Environmental Perturbation Analysis Data Creation

Abstract

Create a data object ready for analysis for the Environmental Perturbation experiment set. brentlabRnaSeqTools package version: 0.0.0.0

BrentlabRnaSeqSet

The BrentlabRnaSeqSet is a child of the DESeqDataSet, which is a child of SummarizedExperiment.

Therefore, the brentlabRnaSeqSet has all of the DESeq functionality – eg, DESeq size factor normalization, or the DESeq() function – as well as all of the SummarizedExperiment methods, and some more ‘custom’ methods more suited to our purposes. It is easily extensible – for those more computationally minded users, it would be a good idea to learn and use the object oriented programming tools. There are many cases in which doing so will make your code less error prone, easier to test, more reproducible, and easier to maintain long term.

Setup

library(brentlabRnaSeqTools)
library(rtracklayer)
library(tidyverse)

# set variables 

KN99_GFF_RDS = Sys.getenv("kn99_stranded_gff_rds")
DB_USERNAME = Sys.getenv("db_username")
DB_PASSWORD = Sys.getenv("db_password")

# note: I mount to the cluster and output directly to it
DDS_OUTPUT_DIR = "."

# controls whether dds objects are written
WRITE_OUT = FALSE

pull the database as a brentlabRnaSeqSet object

blrs = brentlabRnaSeqSetFromDatabase('kn99',DB_USERNAME, DB_PASSWORD)

Add gene level data (optional)

this adds all of the data regarding each locus as a GRange object to the gene data slot of the brentlabRnaSeqSet object. Useful if you are going to use other Bioconductor packages.

kn99_gff = readRDS(KN99_GFF_RDS)

kn99_genes = kn99_gff[kn99_gff$ID %in% rownames(blrs)]

rowRanges(blrs) = kn99_genes[order(match(kn99_genes$ID,rownames(blrs)))]

rownames(blrs) = rowData(blrs)$ID

Create Sets

If the experiment set you want is not already created, issue an ‘issue report’, and describe the EXACT filter that you want to use to create the set. What goes into your set depends on what you describe as your filter, so spend time with the database to figure out what is there.

ep_list = list(
  wt = createExperimentSet(blrs, 'envPert_epWT'),
  titr = createExperimentSet(blrs, 'envPert_titrationWT'),
  pert = createExperimentSet(blrs, 'envPert_perturbed')
)

# NOTE!! AS OF 20220201 CNAG_03894 forms a linear depdendence with both 
# concat treatment and libraryDate columns. it is being removed here to solve
# that issue

ep_list$pert = ep_list$pert[,ep_list$pert$genotype1 != "CNAG_03894"]

Quality Filter


ep_list_qc_passing = map(names(ep_list), 
                         ~qaFilter(ep_list[[.]],1,paste0("ep_",., "_iqr")))

Expression Filter

# How this is done is up to you, and obviously affects what genes are left in.
# Below is an example. You need to think about the thresholds and filter method 
# that suits your purpose best.

expr_fltr_list = map(ep_list, ~rowSums(edgeR::cpm(counts(.))>4) >= 4)

ep_list_qc_passing_fltr = map2(ep_list_qc_passing, expr_fltr_list, ~.x[.y,])

Set Design

setBaseConcatTreatmentBaseCond = function(ep_set, concat_base_cond){
  colData(ep_set)$concat_treatment = 
    relevel(colData(ep_set)$concat_treatment, ref = concat_base_cond)
  
  colData(ep_set) = droplevels(DataFrame(colData(ep_set)))
  
  ep_set
  
}

setEpDesign = function(ep_set, design){
  design(ep_set) = design
  
  ep_set
}

concat_base_cond_list = list(
  
  wt = "YPD_noAtmosphere_30_noTreatment_noTreatmentConc_noPH_30",
  titr = "RPMI_noAtmosphere_30_noTreatment_noTreatmentConc_noPH_30",
  pert = 'PBS_noAtmosphere_30_noTreatment_noTreatmentConc_noPH_0'
)

ep_designs = list(
  wt = formula(~libraryDate + concat_treatment),
  titr = formula(~libraryDate + concat_treatment),
  pert = formula(~libraryDate + concat_treatment + genotype1)
)

ep_list_qc_passing_fltr = map2(ep_list_qc_passing_fltr, 
                               concat_base_cond_list, 
                               setBaseConcatTreatmentBaseCond)

ep_list_qc_passing_fltr = map2(ep_list_qc_passing_fltr, 
                               ep_designs, 
                               setEpDesign)

Coerce back to DeseqDataObjects for proessing

ep_dds_list = map(ep_list_qc_passing_fltr, coerceToDds)
names(ep_dds_list) = names(ep_list)

DDS_OUTPUT_DIR = "/mnt/scratch/rnaseq_pipeline/experiments/epDatafreeze"

WRITE_OUT = TRUE

if(WRITE_OUT){
  today = format(lubridate::today(),"%Y%m%d")

  output_dir = file.path(DDS_OUTPUT_DIR, today)

  dir.create(output_dir, recursive=TRUE)
  
  map(names(ep_dds_list), ~write_rds(ep_dds_list[[.]],
                                     file.path(output_dir,
                                               paste0("ep_",.,".rds"))))
}

Chase Mateusiak

08/04/2022