# Spliceosome_script.R # Manuscript: # Forrest, A. R., Kawaji, H., Rehli, M., Baillie, J. K., De Hoon, M. J., Haberle, V., ... & # Andersson, R. (2014). A promoter-level mammalian expression atlas. Nature, 507(7493), 462-470. # Created by Ruby Sharma # Modified by Sajal Kumar # Copyright (c) NMSU Song lab # A script to build co-expression and differential co-expression networks for: # Cerebellum versus Liver utilzing the TSS found on the Spliceosome pathway # The genes selected for this study were obtained from the differential interactions # in F5_Cb_Liv_script. The details on how the genes and pathways were selected are available # in the manuscript. # Here we begin with normalized matrices for Cage Mouse datasets, containing 112 TSS # across developing cerebellum and liver with outliers removed. The pre-processing steps # can be found in the manuscript. setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) require(stringr) require(DiffXTablesCoexpnet) nthreads = 10 # prepare data for co-expression and differential co-expression experiments # nthreads: number of parallel threads to use. PrepareData = function(nthreads){ # load Cage profiles in mouse cerebellum and liver # with genes on columns and samples on rows load(paste0("../Data/F5CbSplTss.RData")) load(paste0("../Data/F5LivSplTss.RData")) # get gene names tss_names = colnames(CbSplTss) # filter genes with low MAD rem_tss = FilterGenesByMAD(rbind(LivSplTss,CbSplTss), nthreads) # remove Tss that have low MAD LivSplTss = LivSplTss[ , !rem_tss] CbSplTss = CbSplTss[ , !rem_tss] tss_names = tss_names[!rem_tss] # remove genes with 0 variance in either experiment LivSplTssvar = apply(LivSplTss, 2, var) CbSplTssvar = apply(CbSplTss, 2, var) zero_var_tss = LivSplTssvar == 0 | CbSplTssvar == 0 LivSplTss = LivSplTss[ , !zero_var_tss] CbSplTss = CbSplTss[ , !zero_var_tss] tss_names = tss_names[!zero_var_tss] # return the four datasets return(list(LivSplTss = LivSplTss, CbSplTss = CbSplTss, tss_names = tss_names)) } F5Spliceosome_script = function(nthreads){ # prepare data prep_data = PrepareData(nthreads) LivSplTss = prep_data$LivSplTss CbSplTss = prep_data$CbSplTss tss_names = prep_data$tss_names # number of conditions n_conditions = 2 # get co-expression networks # Cerebellum coexpnet cb_coexpnet = GetCoexpnet(parents = CbSplTss, children = CbSplTss, cnames = tss_names, pnames = tss_names, nthreads = nthreads) # Liver coexpnet liv_coexpnet = GetCoexpnet(parents = LivSplTss, children = LivSplTss, cnames = tss_names, pnames = tss_names, nthreads = nthreads) # get differential coexpression networks # get cerebellum liver diffcoexpnet cb_liv_diffcoexpnet = Exp1_vs_Exp2(CbSplTss, tss_names, LivSplTss, nthreads, list(cb_coexpnet, liv_coexpnet)) # threshold effect size # need highest dimension max_sm = nrow(CbSplTss) + nrow(LivSplTss) d = max(2, ceiling((sqrt(max_sm/5)/n_conditions))) # generate distribution sim_shsong_res = SharmaSongEsDist(N=c(100,100), n_conditions = 2, nthreads = nthreads, d = d) # threshold at 60% es_thres = sort(sim_shsong_res[,4])[round(0.6*length(sim_shsong_res[,4]))] # compile percentages Compileresults(cb_liv_diffcoexpnet, 0.05, es_thres) } Compileresults = function(cb_liv, pthres, esthres){ cat("Developing Murine Cerebellum vs Liver in Spliceosome: ", sum(cb_liv$PVALUE < pthres & cb_liv$ESTIMATE > esthres)/nrow(cb_liv), "\n") } GetCoexpnet = function(parents, children, cnames, pnames, nthreads){ # build co-expression networks coexpnet = Build_Coexpnet(parent_expr = parents, child_expr = children, c_names = cnames, p_names = pnames, method = 'univariate', nthreads = nthreads) # filter results filter = coexpnet$PVALUE < 0.1 & coexpnet$ESTIMATE > 0.8 coexpnet = coexpnet[filter, ] gc() # return return(coexpnet) } Exp1_vs_Exp2 = function(data1, gnames, data2, nthreads, coexpnet_list){ # get interaction indices inter_indices = GetInteractionIndices(coexpnet_list) # prepare data exp_matr = rbind(data1, data2) conditions = c(rep(1, nrow(data1)), rep(2, nrow(data2))) n_conditions = 2 # build differential coexpnet diffcoexpnet = Build_DiffCoexpnet(exp_matr = exp_matr, n_conditions = n_conditions, conditions = conditions, indices = inter_indices, g_names = gnames, method = 'univariate', nthreads = nthreads) # return differential coexpression network return(diffcoexpnet) }