14  Single-Sample Gene Set Enrichment Analysis

Single-Sample Gene Set Enrichment Analysis

pkgs <- c("fs", "futile.logger", "configr", "stringr", "ggpubr", "ggthemes",
          "jhtools", "glue", "ggsci", "patchwork", "tidyverse", "dplyr")
for (pkg in pkgs){
  suppressPackageStartupMessages(library(pkg, character.only = T))
}
project <- "mm"
dataset <- "meta"
species <- "human"
workdir <- glue("~/projects/{project}/analysis/{dataset}/{species}")
setwd(workdir)

fn_xls <- "~/projects/mm/docs/meta/genesets_subtypes.xlsx"
genesets1 <- readxl::read_xlsx(fn_xls, sheet = 1)
genesets2 <- readxl::read_xlsx(fn_xls, sheet = 2)
genesets3 <- readxl::read_xlsx(fn_xls, sheet = 3)

genesets1_list <- lapply(unique(genesets1$sub_types), function(i){
  genesets1 |> dplyr::filter(sub_types == {{i}}) |> pull(gene_name) |> unique()
})
names(genesets1_list) <- paste0("Zhan_et_al_", unique(genesets1$sub_types))

genesets2_list <- lapply(unique(genesets2$sub_types), function(i){
  genesets2 |> dplyr::filter(sub_types == {{i}}) |> pull(gene_name) |> unique()
})
names(genesets2_list) <- paste0("Broyl_et_al_", unique(genesets2$sub_types))
genesets3 <- list(GEP70 = unique(genesets3$gene_name))
exp_fn <- "~/projects/mm/analysis/meta/human/rnaseq/exp/mm1199.rds"
dat_exp <- read_rds(exp_fn) |> as.matrix()
ssgseapar <- GSVA::ssgseaParam(exprData = dat_exp, geneSets = c(genesets1_list, genesets2_list, genesets3))
ssgsea_matrix <- GSVA::gsva(ssgseapar) |> t() |> as.data.frame()
colnames(ssgsea_matrix) <- paste0("ssgsea",colnames(ssgsea_matrix))
ssgsea_matrix <- ssgsea_matrix |> rownames_to_column(var = "sample_id") |> as_tibble()


gsvapar <- GSVA::gsvaParam(exprData = dat_exp, geneSets = c(genesets1_list, genesets2_list, genesets3), minSize=1)
score_matrix <- GSVA::gsva(gsvapar) |> t() |> as.data.frame()
#colnames(score_matrix) <- paste0("gsva",colnames(score_matrix))
score_matrix <- score_matrix |> rownames_to_column(var = "sample_id") |> as_tibble()
gsva_score <- left_join(score_matrix, ssgsea_matrix, by = join_by(sample_id == sample_id))
fn <- "~/projects/mm/docs/meta/sampleinfo/gsva_score.rds"
write_rds(gsva_score, fn)
head(gsva_score[,1:5])
colnames(gsva_score)

fn_info <- "~/projects/mm/docs/meta/sampleinfo/sampleinfo_jilin_commpass.rds"
dat_raw <- read_rds(fn_info)
dat <- dat_raw |> dplyr::select( - contains("_et_al_"))

dat_out <- left_join(dat, score_matrix, by = join_by(sample_id == sample_id))
write_rds(dat_out, fn_info)
colnames(dat_out)