# F5_Cb_Liv_script.R # Manuscript: # Forrest, A. R., Kawaji, H., Rehli, M., Baillie, J. K., De Hoon, M. J., Haberle, V., ... & # Andersson, R. (2014). A promoter-level mammalian expression atlas. Nature, 507(7493), 462-470. # De Rie, D., Abugessaisa, I., Alam, T., Arner, E., Arner, P., Ashoor, H., ... & Carlisle, A. J. (2017). # An integrated expression atlas of miRNAs and their promoters in human and mouse. # Nature biotechnology, 35(9), 872-878. # Created by Ruby Sharma # Modified by Sajal Kumar # Copyright (c) NMSU Song lab # A script to build co-expression and differential co-expression networks for: # Cerebellum versus Liver, where the parents are miRNAs and children are CAGE # Here we begin with normalized matrices for Cage and miRNA Mouse datasets, containing 641 miRNAs # and 22,637 Cage profiles across developing cerebellum and liver with outliers removed. # The pre-processing steps can be found in the manuscript. setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) require(stringr) require(DiffXTablesCoexpnet) nthreads = 10 # prepare data for co-expression and differential co-expression experiments # nthreads: number of parallel threads to use. PrepareData = function(nthreads){ # load miRNA and Cage profiles in mouse cerebellum and liver # with genes on columns and samples on rows load(paste0("../Data/F5CbMiRnaTss.RData")) load(paste0("../Data/F5LivMiRnaTss.RData")) # get gene names mirna_names = paste0("miRNA_",colnames(LivMiRna)) tss_names = paste0("TSS_",colnames(LivTss)) # filter genes with low MAD rem_mirna = FilterGenesByMAD(rbind(LivMiRna,CbMiRna), nthreads) rem_tss = FilterGenesByMAD(rbind(LivTss,CbTss), nthreads) # remove Tss/Mirna that have low MAD LivMiRna = LivMiRna[ , !rem_mirna] CbMiRna = CbMiRna[ , !rem_mirna] mirna_names = mirna_names[!rem_mirna] LivTss = LivTss[ , !rem_tss] CbTss = CbTss[ , !rem_tss] tss_names = tss_names[!rem_tss] # remove genes with 0 variance in either experiment LivTssvar = apply(LivTss, 2, var) LivMiRnavar = apply(LivMiRna, 2, var) CbTssvar = apply(CbTss, 2, var) CbMiRnavar = apply(CbMiRna, 2, var) zero_var_tss = LivTssvar == 0 | CbTssvar == 0 zero_var_mirna = LivMiRnavar == 0 | CbMiRnavar == 0 LivTss = LivTss[ , !zero_var_tss] CbTss = CbTss[ , !zero_var_tss] tss_names = tss_names[!zero_var_tss] LivMiRna = LivMiRna[ , !zero_var_mirna] CbMiRna = CbMiRna[ , !zero_var_mirna] mirna_names = mirna_names[!zero_var_mirna] # return the four datasets return(list(LivTss = LivTss, CbTss = CbTss, LivMiRna = LivMiRna, CbMiRna = CbMiRna, tss_names = tss_names, mirna_names = mirna_names)) } F5_Cb_Liv_script = function(nthreads){ # prepare data prep_data = PrepareData(nthreads) LivTss = prep_data$LivTss CbTss = prep_data$CbTss LivMiRna = prep_data$LivMiRna CbMiRna = prep_data$CbMiRna tss_names = prep_data$tss_names mirna_names = prep_data$mirna_names # number of conditions n_conditions = 2 # get co-expression networks # Cerebellum coexpnet cb_coexpnet = GetCoexpnet(parents = CbMiRna, children = CbTss, cnames = tss_names, pnames = mirna_names, nthreads = nthreads) # Liver coexpnet liv_coexpnet = GetCoexpnet(parents = LivMiRna, children = LivTss, cnames = tss_names, pnames = mirna_names, nthreads = nthreads) # adjust child indices cb_coexpnet$CID = cb_coexpnet$CID + length(mirna_names) liv_coexpnet$CID = liv_coexpnet$CID + length(mirna_names) # get differential coexpression networks # get cerebellum liver diffcoexpnet cb_liv_diffcoexpnet = Exp1_vs_Exp2(cbind(CbMiRna, CbTss), c(mirna_names, tss_names), cbind(LivMiRna, LivTss), nthreads, list(cb_coexpnet, liv_coexpnet)) # threshold effect size # need highest dimension max_sm = nrow(CbMiRna) + nrow(LivMiRna) d = max(2, ceiling((sqrt(max_sm/5)/n_conditions))) # generate distribution sim_shsong_res = SharmaSongEsDist(N=c(100,100), n_conditions = 2, nthreads = nthreads, d = d) # threshold at 60% es_thres = sort(sim_shsong_res[,4])[round(0.6*length(sim_shsong_res[,4]))] # compile percentages Compileresults(cb_liv_diffcoexpnet, 0.05, es_thres) } Compileresults = function(cb_liv, pthres, esthres){ cat("Developing Murine Cerebellum vs Liver: ", sum(cb_liv$PVALUE < pthres & cb_liv$ESTIMATE > esthres)/nrow(cb_liv), "\n") } GetCoexpnet = function(parents, children, cnames, pnames, nthreads){ # build co-expression networks coexpnet = Build_Coexpnet(parent_expr = parents, child_expr = children, c_names = cnames, p_names = pnames, method = 'univariate', nthreads = nthreads) # filter results filter = coexpnet$PVALUE < 0.1 & coexpnet$ESTIMATE > 0.8 coexpnet = coexpnet[filter, ] gc() # return return(coexpnet) } Exp1_vs_Exp2 = function(data1, gnames, data2, nthreads, coexpnet_list){ # get interaction indices inter_indices = GetInteractionIndices(coexpnet_list) # prepare data exp_matr = rbind(data1, data2) conditions = c(rep(1, nrow(data1)), rep(2, nrow(data2))) n_conditions = 2 # build differential coexpnet diffcoexpnet = Build_DiffCoexpnet(exp_matr = exp_matr, n_conditions = n_conditions, conditions = conditions, indices = inter_indices, g_names = gnames, method = 'univariate', nthreads = nthreads) # return differential coexpression network return(diffcoexpnet) }