Skip to content

Commit

Permalink
Update create_protein_boxplot() to make it external
Browse files Browse the repository at this point in the history
  • Loading branch information
kantonopoulos committed Jul 10, 2024
1 parent 53ff091 commit b9c1459
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 20 deletions.
1 change: 0 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ Imports:
limma,
missForest,
parsnip,
purrr,
readxl,
recipes,
rlang,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(clean_metadata)
export(cluster_data)
export(create_corr_heatmap)
export(create_dir)
export(create_protein_boxplot)
export(do_elnet)
export(do_limma)
export(do_pca)
Expand Down
20 changes: 12 additions & 8 deletions R/classification_models.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
utils::globalVariables(c("roc_auc", ".config", ".pred_class", ".pred_0", "Scaled_Importance",
"Importance", "Variable", "std_err"))
utils::globalVariables(c(
".config", "test_data", "test_set", ".pred_class", ".pred_0",
"Scaled_Importance", "Importance", "Variable", "std_err"
))
#' Split data into training and test sets
#'
#' This function splits the data into training and test sets based on user defined ratio.
Expand Down Expand Up @@ -556,6 +552,8 @@ plot_var_imp <- function (finalfit_res,
#' @param palette (character or vector). The color palette for the plot. If it is a character, it should be one of the palettes from get_hpa_palettes(). Default is NULL.
#' @param vline (logical). Whether to add a vertical line at 50% importance. Default is TRUE.
#' @param subtitle (vector). Vector of subtitles to include in the plot. Default is a list with all.
#' @param nfeatures (numeric). Number of top features to include in the boxplot. Default is 9.
#' @param points (logical). Whether to add points to the boxplot. Default is TRUE.
#' @param seed (numeric). Seed for reproducibility. Default is 123.
#'
#' @return A list with results for each disease. The list contains:
Expand Down Expand Up @@ -662,12 +660,18 @@ do_elnet <- function(olink_data,
vline = vline,
subtitle)

top_features <- var_imp_res$features |>
dplyr::arrange(dplyr::desc(Scaled_Importance)) |>
dplyr::select(Variable) |>
dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |>
utils::head(nfeatures)
proteins <- top_features[['Variable']]

boxplot_res <- create_protein_boxplot(join_data,
proteins,
disease,
features = var_imp_res$features,
nfeatures = nfeatures,
palette = palette,
points = points)
points,
palette)

return(list("hypopt_res" = hypopt_res,
"finalfit_res" = finalfit_res,
Expand Down
35 changes: 24 additions & 11 deletions R/visualize.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,27 @@
utils::globalVariables(c("Value"))
#' Create boxplots for proteins
#'
#' This function creates a boxplot for the top n proteins in the dataset.
#' It annotates the boxplot with color for the selected disease.
#' It is also possible to add points to the boxplot.
#'
#' @param join_data (tibble). The dataset with the wide Olink data joined with the metadata.
#' @param proteins (vector). The proteins to include in the boxplot.
#' @param disease (character). The disease to annotate.
#' @param points (logical). Whether to add points to the boxplot.
#' @param palette (character). The color palette to use. Default is red3.
#'
#' @return boxplot_panel (plot). The boxplot panel with the selected proteins.
#' @export
#'
#' @examples
#' wide_data <- widen_data(example_data, FALSE)
#' join_data <- wide_data |>
#' dplyr::left_join(example_metadata |> dplyr::select(DAid, Disease, Sex))
#' create_protein_boxplot(join_data, c("A1BG", "A2M"), "AML", palette = "cancers12")
create_protein_boxplot <- function(join_data,
features,
proteins,
disease,
nfeatures = 9,
points = T,
palette = NULL) {

Expand All @@ -16,18 +36,11 @@ create_protein_boxplot <- function(join_data,
pal <- "red3"
}

top_features <- features |>
dplyr::arrange(desc(Scaled_Importance)) |>
dplyr::select(Variable) |>
dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |>
head(nfeatures)
top_features <- top_features[['Variable']]

long_data <- join_data |>
dplyr::select(Disease, dplyr::all_of(top_features)) |>
dplyr::select(Disease, dplyr::all_of(proteins)) |>
tidyr::pivot_longer(cols = !Disease, names_to = "Protein", values_to = "Value")

long_data$Protein <- factor(long_data$Protein, levels = top_features, labels = top_features)
long_data$Protein <- factor(long_data$Protein, levels = proteins, labels = proteins)

# Create boxplot
boxplot <- long_data |>
Expand Down
39 changes: 39 additions & 0 deletions man/create_protein_boxplot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions man/do_elnet.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b9c1459

Please sign in to comment.