Tally Experiment Sets

Abstract

Tally Experiment sets. brentlabRnaSeqTools package version: 0.0.0.0

Introduction

The purpose of tallying the experiment sets is to track the development of a given set. These are meant as examples – if you are the analyst consuming a given data set, it is a good idea to figure out how to tally your set to see how close it is to being done.

For each set (eg, the Environmental Perturbation), if you extract the code and paste it into a clean notebook, a somewhat formatted html document will be created. You can publish this to Rpubs, a free server for rendered Rmd notebooks, which is a nice way of sharing the set progress as well as a method of tracking progress over time, as the rendered results are saved on the Rpubs server.

Setup

library(brentlabRnaSeqTools)
library(rtracklayer)
library(tidyverse)

# set variables 

KN99_GFF_RDS = Sys.getenv("kn99_stranded_gff_rds")
DB_USERNAME = Sys.getenv("db_username")
DB_PASSWORD = Sys.getenv("db_password")

# note: I mount to the cluster and output directly to it
DDS_OUTPUT_DIR = "."

# controls whether dds objects are written
WRITE_OUT = FALSE

pull the database as a brentlabRnaSeqSet object

Note: you really only need the metadata for this task. You could use getMetadata.

blrs = brentlabRnaSeqSetFromDatabase('kn99',DB_USERNAME, DB_PASSWORD)

Add gene level data (optional)

this adds all of the data regarding each locus as a GRange object to the gene data slot of the brentlabRnaSeqSet object. Useful if you are going to use other Bioconductor packages.

kn99_gff = readRDS(KN99_GFF_RDS)

kn99_genes = kn99_gff[kn99_gff$ID %in% rownames(blrs)]

rowRanges(blrs) = kn99_genes[order(match(kn99_genes$ID,rownames(blrs)))]

rownames(blrs) = rowData(blrs)$ID

Environmental Perturbation

Create Sets

ep_list = list(
  wt = createExperimentSet(blrs, 'envPert_epWT'),
  titr = createExperimentSet(blrs, 'envPert_titrationWT'),
  pert = createExperimentSet(blrs, 'envPert_perturbed')
)

# NOTE!! AS OF 20220201 CNAG_03894 forms a linear depdendence with both 
# concat treatment and libraryDate columns. it is being removed here to solve
# that issue

ep_list$pert = ep_list$pert[,ep_list$pert$genotype1 != "CNAG_03894"]


ep_list_qc_passing = map(ep_list,qaFilter)

ep_list_qc_passing_with_iqr = map(names(ep_list), 
                         ~qaFilter(ep_list[[.]],1,paste0("ep_",., "_iqr")))
names(ep_list_qc_passing_with_iqr) = names(ep_list)

condition_lists = list(
  wt = alist(medium,
             atmosphere,
             temperature, 
             timePoint, 
             treatment, 
             treatmentConc, 
             pH),
  titr = alist(medium,
               atmosphere,
               temperature, 
               timePoint, 
               treatment, 
               treatmentConc, 
               pH),
  pert = alist(genotype1,
               medium,
               atmosphere,
               temperature, 
               timePoint, 
               treatment, 
               treatmentConc, 
               pH))

ep_tally_list = map2(names(ep_list), condition_lists, 
                     ~createEPTally(ep_list[[.x]], 
                                    ep_list_qc_passing[[.x]], 
                                    ep_list_qc_passing_with_iqr[[.x]],
                                    .y))
names(ep_tally_list) = names(ep_list)


names(ep_list_qc_passing_with_iqr) = names(ep_list)

# write_csv(env_pert_tally, 
#           '../../../datafreeze_202111/data/env_pert_tally_20211122.csv')
# 
# # add those samples with less than 2 replicates
# iqr_fltr_rle_summary_mod = removed_effect_rle_summary %>% 
#   filter(INTERQUARTILE_RANGE<iqr_threshold | is.na(INTERQUARTILE_RANGE))
# 
# write_csv(iqr_fltr_rle_summary_mod, 
#           '../../../datafreeze_202111/data/iqr_fltr_rle_summary_20211122.csv')

ep_wt tallies

tally_type_list = c('unfiltered_tally',
                    'qc1_passing_tally',
                    'iqr_filter_qc1_passing_tally')

ep_tally = map(ep_tally_list, ~map(tally_type_list, reshapeEnvPertTallies, .))

ep_tally = map(ep_tally, setNames, tally_type_list)

RLE results

Norm Counts Iqr

column 3

# note that this function requires that ep_list and ep_list_qc_passing_with_iqr
# be in the namespace already (see the setup section)
# TODO: save the norm count rle from the sample agreement step for this function
cumDistIqr = function(set_name){
  plot(ggplot() + 
       stat_ecdf(data = norm_count_rle), 
                 aes(!!rlang::sym(paste0("ep_",set_name, "_iqr"))), 
                 color = 'orange') + 
       stat_ecdf(data = extractColData(ep_list_qc_passing_with_iqr[[set_name]]), 
                 aes(!!rlang::sym(paste0("ep_", set_name, "_iqr"))), 
                 color = "blue") + 
       ggtitle("orange = normalized counts; blue = libdate_model_removed_libdate")) +
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,.05))+
  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,.05))
}

Environmental Perturbation – WT

Column

unfiltered tallies

ep_tally$wt$unfiltered_tally %>%
  DT::datatable(
    options = list(
    pageLength=50, 
    scrollX='400px', 
    fixedColumns = list(leftColumns = 2),
    autoWidth = TRUE)
)

qc1 passing

ep_tally$wt$qc1_passing_tally %>%
  DT::datatable(
    options = list(
    pageLength=50, 
    scrollX='400px', 
    fixedColumns = list(leftColumns = 2),
    autoWidth = TRUE)
)

iqr filtered

ep_tally$wt$iqr_filter_qc1_passing_tally %>%
  DT::datatable(
    options = list(
    pageLength=50, 
    scrollX='400px', 
    fixedColumns = list(leftColumns = 2),
    autoWidth = TRUE)
)

Environmental Perturbation – Perturbed

Column

unfiltered tallies

ep_tally$pert$unfiltered_tally %>%
  DT::datatable(
    options = list(
    pageLength=50, 
    scrollX='400px', 
    fixedColumns = list(leftColumns = 2),
    autoWidth = TRUE)
)

qc1 passing

ep_tally$pert$qc1_passing_tally %>%
  DT::datatable(
    options = list(
    pageLength=50, 
    scrollX='400px', 
    fixedColumns = list(leftColumns = 2),
    autoWidth = TRUE)
)

iqr filtered

ep_tally$pert$iqr_filter_qc1_passing_tally  %>%
  DT::datatable(
    options = list(
    pageLength=50, 
    scrollX='400px', 
    fixedColumns = list(leftColumns = 2),
    autoWidth = TRUE)
)

Titration Tallies – WT

Column

unfiltered tallies

ep_tally$titr$unfiltered_tally %>%
  DT::datatable(
    options = list(
    pageLength=50, 
    scrollX='400px', 
    fixedColumns = list(leftColumns = 2),
    autoWidth = TRUE)
)

qc1 passing

ep_tally$titr$qc1_passing_tally %>%
  DT::datatable(
    options = list(
    pageLength=50, 
    scrollX='400px', 
    fixedColumns = list(leftColumns = 2),
    autoWidth = TRUE)
)

iqr filter

ep_tally$titr$iqr_filter_qc1_passing_tally %>%
  DT::datatable(
    options = list(
    pageLength=50, 
    scrollX='400px', 
    fixedColumns = list(leftColumns = 2),
    autoWidth = TRUE)
)

Chase Mateusiak

08/04/2022

Introduction

Setup

pull the database as a brentlabRnaSeqSet object

Add gene level data (optional)

Environmental Perturbation

Create Sets

ep_wt tallies

RLE results

Norm Counts Iqr

column 3

Environmental Perturbation – WT

Column

unfiltered tallies

qc1 passing

iqr filtered

Environmental Perturbation – Perturbed

Column

unfiltered tallies

qc1 passing

iqr filtered

Titration Tallies – WT

Column

unfiltered tallies

qc1 passing

iqr filter