From b9c1459aa5c15c27a24610c0fbab3e0932721bf8 Mon Sep 17 00:00:00 2001 From: Konstantinos Antonopoulos Date: Wed, 10 Jul 2024 13:04:46 +0200 Subject: [PATCH] Update create_protein_boxplot() to make it external --- DESCRIPTION | 1 - NAMESPACE | 1 + R/classification_models.R | 20 +++++++++++------- R/visualize.R | 35 +++++++++++++++++++++---------- man/create_protein_boxplot.Rd | 39 +++++++++++++++++++++++++++++++++++ man/do_elnet.Rd | 6 ++++++ 6 files changed, 82 insertions(+), 20 deletions(-) create mode 100644 man/create_protein_boxplot.Rd diff --git a/DESCRIPTION b/DESCRIPTION index c8b17126..b24d0a18 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,6 @@ Imports: limma, missForest, parsnip, - purrr, readxl, recipes, rlang, diff --git a/NAMESPACE b/NAMESPACE index 669fe4fc..969387ae 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(clean_metadata) export(cluster_data) export(create_corr_heatmap) export(create_dir) +export(create_protein_boxplot) export(do_elnet) export(do_limma) export(do_pca) diff --git a/R/classification_models.R b/R/classification_models.R index 811fb59c..940375c5 100644 --- a/R/classification_models.R +++ b/R/classification_models.R @@ -1,9 +1,5 @@ utils::globalVariables(c("roc_auc", ".config", ".pred_class", ".pred_0", "Scaled_Importance", "Importance", "Variable", "std_err")) -utils::globalVariables(c( - ".config", "test_data", "test_set", ".pred_class", ".pred_0", - "Scaled_Importance", "Importance", "Variable", "std_err" -)) #' Split data into training and test sets #' #' This function splits the data into training and test sets based on user defined ratio. @@ -556,6 +552,8 @@ plot_var_imp <- function (finalfit_res, #' @param palette (character or vector). The color palette for the plot. If it is a character, it should be one of the palettes from get_hpa_palettes(). Default is NULL. #' @param vline (logical). Whether to add a vertical line at 50% importance. Default is TRUE. #' @param subtitle (vector). Vector of subtitles to include in the plot. Default is a list with all. +#' @param nfeatures (numeric). Number of top features to include in the boxplot. Default is 9. +#' @param points (logical). Whether to add points to the boxplot. Default is TRUE. #' @param seed (numeric). Seed for reproducibility. Default is 123. #' #' @return A list with results for each disease. The list contains: @@ -662,12 +660,18 @@ do_elnet <- function(olink_data, vline = vline, subtitle) + top_features <- var_imp_res$features |> + dplyr::arrange(dplyr::desc(Scaled_Importance)) |> + dplyr::select(Variable) |> + dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> + utils::head(nfeatures) + proteins <- top_features[['Variable']] + boxplot_res <- create_protein_boxplot(join_data, + proteins, disease, - features = var_imp_res$features, - nfeatures = nfeatures, - palette = palette, - points = points) + points, + palette) return(list("hypopt_res" = hypopt_res, "finalfit_res" = finalfit_res, diff --git a/R/visualize.R b/R/visualize.R index 8d454f5a..a49ecedc 100644 --- a/R/visualize.R +++ b/R/visualize.R @@ -1,7 +1,27 @@ +utils::globalVariables(c("Value")) +#' Create boxplots for proteins +#' +#' This function creates a boxplot for the top n proteins in the dataset. +#' It annotates the boxplot with color for the selected disease. +#' It is also possible to add points to the boxplot. +#' +#' @param join_data (tibble). The dataset with the wide Olink data joined with the metadata. +#' @param proteins (vector). The proteins to include in the boxplot. +#' @param disease (character). The disease to annotate. +#' @param points (logical). Whether to add points to the boxplot. +#' @param palette (character). The color palette to use. Default is red3. +#' +#' @return boxplot_panel (plot). The boxplot panel with the selected proteins. +#' @export +#' +#' @examples +#' wide_data <- widen_data(example_data, FALSE) +#' join_data <- wide_data |> +#' dplyr::left_join(example_metadata |> dplyr::select(DAid, Disease, Sex)) +#' create_protein_boxplot(join_data, c("A1BG", "A2M"), "AML", palette = "cancers12") create_protein_boxplot <- function(join_data, - features, + proteins, disease, - nfeatures = 9, points = T, palette = NULL) { @@ -16,18 +36,11 @@ create_protein_boxplot <- function(join_data, pal <- "red3" } - top_features <- features |> - dplyr::arrange(desc(Scaled_Importance)) |> - dplyr::select(Variable) |> - dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> - head(nfeatures) - top_features <- top_features[['Variable']] - long_data <- join_data |> - dplyr::select(Disease, dplyr::all_of(top_features)) |> + dplyr::select(Disease, dplyr::all_of(proteins)) |> tidyr::pivot_longer(cols = !Disease, names_to = "Protein", values_to = "Value") - long_data$Protein <- factor(long_data$Protein, levels = top_features, labels = top_features) + long_data$Protein <- factor(long_data$Protein, levels = proteins, labels = proteins) # Create boxplot boxplot <- long_data |> diff --git a/man/create_protein_boxplot.Rd b/man/create_protein_boxplot.Rd new file mode 100644 index 00000000..dc10593f --- /dev/null +++ b/man/create_protein_boxplot.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualize.R +\name{create_protein_boxplot} +\alias{create_protein_boxplot} +\title{Create boxplots for proteins} +\usage{ +create_protein_boxplot( + join_data, + proteins, + disease, + points = T, + palette = NULL +) +} +\arguments{ +\item{join_data}{(tibble). The dataset with the wide Olink data joined with the metadata.} + +\item{proteins}{(vector). The proteins to include in the boxplot.} + +\item{disease}{(character). The disease to annotate.} + +\item{points}{(logical). Whether to add points to the boxplot.} + +\item{palette}{(character). The color palette to use. Default is red3.} +} +\value{ +boxplot_panel (plot). The boxplot panel with the selected proteins. +} +\description{ +This function creates a boxplot for the top n proteins in the dataset. +It annotates the boxplot with color for the selected disease. +It is also possible to add points to the boxplot. +} +\examples{ +wide_data <- widen_data(example_data, F) +join_data <- wide_data |> + dplyr::left_join(example_metadata |> dplyr::select(DAid, Disease, Sex)) +create_protein_boxplot(join_data, c("A1BG", "A2M"), "AML", palette = "cancers12") +} diff --git a/man/do_elnet.Rd b/man/do_elnet.Rd index add059af..d9a24c97 100644 --- a/man/do_elnet.Rd +++ b/man/do_elnet.Rd @@ -22,6 +22,8 @@ do_elnet( vline = T, subtitle = c("accuracy", "sensitivity", "specificity", "auc", "features", "top-features", "mixture"), + nfeatures = 9, + points = T, seed = 123 ) } @@ -58,6 +60,10 @@ do_elnet( \item{subtitle}{(vector). Vector of subtitles to include in the plot. Default is a list with all.} +\item{nfeatures}{(numeric). Number of top features to include in the boxplot. Default is 9.} + +\item{points}{(logical). Whether to add points to the boxplot. Default is TRUE.} + \item{seed}{(numeric). Seed for reproducibility. Default is 123.} } \value{