Plotting FABP4 vs ADH1B for Microarray Datasets ======================================================== by Keith Baggerly *Jun 23, 2013* ```{r options, echo=FALSE} opts_chunk$set(tidy=TRUE, message=FALSE) ``` ## 1 Executive Summary ### 1.1 Introduction We want to examine the joint distribution of FABP4 and ADH1B in the TCGA, Tothill, Bonome, and CCLE cohorts, and to assess how much RD rates increase with expression levels. ### 1.2 Data and Methods We use the array and clinical data prepared in the various "assembleData" and "assembleClinical" scripts. We produce plots for each dataset, using high/low cutoffs assessed by eye. We measure the RD rates when both genes are high for the TCGA and Tothill cohorts. ### 1.3 Results We produce figures named fabp4VsAdh1b(dataset). The TCGA RD rates are 97/107 in the high expression group, vs 281/384 in the complement. The Tothill RD rates are 59/63 in the high expression group, vs 80/126 in the complement. ## 2. Plotting FABP4 vs ADH1B for TCGA ```{r loadTCGA} load(file.path("RDataObjects","tcgaExpression.RData")) load(file.path("RDataObjects","tcgaFilteredSamples.RData")) load(file.path("RDataObjects","tcgaClinical.RData")) ``` ```{r plotFABP4vsADH1BforTCGA} fabp4.ps <- "203980_at" adh1b.ps <- "209612_s_at" bgColors <- rep("grey",length(tcgaSampleRD)) bgColors[tcgaSampleRD=="RD"] <- "red" tcgaUsed <- tcgaFilteredSamples[,"sampleUse"]=="Used" plot(tcgaExpression[fabp4.ps,tcgaUsed], tcgaExpression[adh1b.ps,tcgaUsed], pch=21,bg=bgColors[tcgaUsed], xlab="FABP4 Expression (203980_at)", ylab="ADH1B Expression (209612_s_at)", main="FABP4 and ADH1B in TCGA Ovarian Samples") abline(v=3.5,h=3.5) legend("topleft", c("Yes", "No"), pch = 19, col = c("red", "grey"), bty = "n", title = "RD Status") ``` ```{r plotFABP4vsADH1BforTCGAPdf, echo=FALSE} pdf(file=file.path("Figures","fabp4VsAdh1bTCGA.pdf")) fabp4.ps <- "203980_at" adh1b.ps <- "209612_s_at" bgColors <- rep("grey",length(tcgaSampleRD)) bgColors[tcgaSampleRD=="RD"] <- "red" tcgaUsed <- tcgaFilteredSamples[,"sampleUse"]=="Used" plot(tcgaExpression[fabp4.ps,tcgaUsed], tcgaExpression[adh1b.ps,tcgaUsed], pch=21,bg=bgColors[tcgaUsed], xlab="FABP4 Expression (203980_at)", ylab="ADH1B Expression (209612_s_at)", main="FABP4 and ADH1B in TCGA Ovarian Samples") abline(v=3.5,h=3.5) legend("topleft", c("Yes", "No"), pch = 19, col = c("red", "grey"), bty = "n", title = "RD Status") dev.off() ``` Now we check the RD to No RD ratios by subgroup. ```{r checkStatusTCGA} table(tcgaSampleRD[tcgaUsed], tcgaExpression[fabp4.ps,tcgaUsed] > 3.5) table(tcgaSampleRD[tcgaUsed], tcgaExpression[adh1b.ps,tcgaUsed] > 3.5) table(tcgaSampleRD[tcgaUsed], (tcgaExpression[fabp4.ps,tcgaUsed] > 3.5) & (tcgaExpression[adh1b.ps,tcgaUsed] > 3.5)) ``` When both gene levels are high (above 3.5), the RD rate is 97/107 (90.6%), as opposed to 281/384 (73.2%). ```{r cleanupFigTCGA} rm(tcgaExpression,tcgaFilteredSamples,tcgaSampleInfo, fabp4.ps, adh1b.ps, bgColors, tcgaDataDirs, tcgaFiles, tcgaSampleClinicalMapping, tcgaSampleRD, tcgaUsed) ``` ## 3. Plotting FABP4 vs ADH1B for Tothill ```{r loadTothill} load(file.path("RDataObjects","tothillExpression.RData")) load(file.path("RDataObjects","tothillFilteredSamples.RData")) load(file.path("RDataObjects","tothillClinical.RData")) tothillExpression <- tothillExpression[,rownames(tothillFilteredSamples)] all(rownames(tothillFilteredSamples)==names(tothillRD)) ``` ```{r plotFABP4vsADH1BforTothill} fabp4.ps <- "203980_at" adh1b.ps <- "209612_s_at" bgColors <- rep("grey",length(tothillRD)) bgColors[tothillRD=="RD"] <- "red" tothillUsed <- tothillFilteredSamples[,"sampleUse"]=="Used" plot(tothillExpression[fabp4.ps,tothillUsed], tothillExpression[adh1b.ps,tothillUsed], pch=21,bg=bgColors[tothillUsed], xlab="FABP4 Expression (203980_at)", ylab="ADH1B Expression (209612_s_at)", main="FABP4 and ADH1B in Tothill Ovarian Samples") abline(v=5.25,h=4.25) legend("topleft", c("Yes", "No"), pch = 19, col = c("red", "grey"), bty = "n", title = "RD Status") ``` ```{r plotFABP4vsADH1BforTothillPdf, echo=FALSE} pdf(file=file.path("Figures","fabp4VsAdh1bTothill.pdf")) fabp4.ps <- "203980_at" adh1b.ps <- "209612_s_at" bgColors <- rep("grey",length(tothillRD)) bgColors[tothillRD=="RD"] <- "red" tothillUsed <- tothillFilteredSamples[,"sampleUse"]=="Used" plot(tothillExpression[fabp4.ps,tothillUsed], tothillExpression[adh1b.ps,tothillUsed], pch=21,bg=bgColors[tothillUsed], xlab="FABP4 Expression (203980_at)", ylab="ADH1B Expression (209612_s_at)", main="FABP4 and ADH1B in Tothill Ovarian Samples") abline(v=5.25,h=4.25) legend("topleft", c("Yes", "No"), pch = 19, col = c("red", "grey"), bty = "n", title = "RD Status") dev.off() ``` Now we check the RD to No RD ratios by subgroup. ```{r checkStatusTothill} table(tothillRD[tothillUsed], tothillExpression[fabp4.ps,tothillUsed] > 5.25) table(tothillRD[tothillUsed], tothillExpression[adh1b.ps,tothillUsed] > 4.25) table(tothillRD[tothillUsed], (tothillExpression[fabp4.ps,tothillUsed] > 5.25) & (tothillExpression[adh1b.ps,tothillUsed] > 4.25)) ``` When both gene levels are high, the RD rate is 59/63 (93.7%), as opposed to 80/126 (63.5%). ```{r cleanupTothill} rm(tothillClinical, tothillExpression,tothillFilteredSamples, tothillOSMos, tothillPFSMos, fabp4.ps, adh1b.ps, bgColors, tothillRD, tothillUsed) ``` ## 4. Plotting FABP4 vs ADH1B for Bonome ```{r loadBonome} load(file.path("RDataObjects","bonomeExpression.RData")) load(file.path("RDataObjects","bonomeClinical.RData")) bonomeExpression <- bonomeExpression[,rownames(bonomeClinical)] ``` ```{r plotFABP4vsADH1BforBonome} fabp4.ps <- "203980_at" adh1b.ps <- "209612_s_at" bgColors <- rep("grey",nrow(bonomeClinical)) bgColors[bonomeClinical[,"SurgeryOutcome"]=="Suboptimal"] <- "red" bonomeUsed <- bonomeClinical[,"SurgeryOutcome"]!="" ## omit 10 normal samples plot(bonomeExpression[fabp4.ps,bonomeUsed], bonomeExpression[adh1b.ps,bonomeUsed], pch=21,bg=bgColors[bonomeUsed], xlab="FABP4 Expression (203980_at)", ylab="ADH1B Expression (209612_s_at)", main="FABP4 and ADH1B in Bonome Ovarian Samples") abline(v=5.25,h=4.9) legend("topleft", c("Subopt", "Optimal"), pch = 19, col = c("red", "grey"), bty = "n", title = "Debulking") ``` ```{r plotFABP4vsADH1BforBonomePdf, echo=FALSE} pdf(file=file.path("Figures","fabp4VsAdh1bBonome.pdf")) fabp4.ps <- "203980_at" adh1b.ps <- "209612_s_at" bgColors <- rep("grey",nrow(bonomeClinical)) bgColors[bonomeClinical[,"SurgeryOutcome"]=="Suboptimal"] <- "red" bonomeUsed <- bonomeClinical[,"SurgeryOutcome"]!="" ## omit 10 normal samples plot(bonomeExpression[fabp4.ps,bonomeUsed], bonomeExpression[adh1b.ps,bonomeUsed], pch=21,bg=bgColors[bonomeUsed], xlab="FABP4 Expression (203980_at)", ylab="ADH1B Expression (209612_s_at)", main="FABP4 and ADH1B in Bonome Ovarian Samples") abline(v=5.25,h=4.9) legend("topleft", c("Subopt", "Optimal"), pch = 19, col = c("red", "grey"), bty = "n", title = "Debulking") dev.off() ``` Now we check the Optimal to Suboptimal ratios by subgroup. ```{r checkStatusBonome} table(bonomeClinical[bonomeUsed,"SurgeryOutcome"], bonomeExpression[fabp4.ps,bonomeUsed] > 5.25) table(bonomeClinical[bonomeUsed,"SurgeryOutcome"], bonomeExpression[adh1b.ps,bonomeUsed] > 4.9) table(bonomeClinical[bonomeUsed,"SurgeryOutcome"], (bonomeExpression[fabp4.ps,bonomeUsed] > 5.25) & (bonomeExpression[adh1b.ps,bonomeUsed] > 4.9)) ``` ```{r cleanupBonome} rm(bonomeClinical, bonomeExpression, fabp4.ps, adh1b.ps, bgColors, bonomeOSYrs, bonomeUsed) ``` ## 5. Plotting FABP4 vs ADH1B for CCLE ```{r loadCCLE} load(file.path("RDataObjects","ccleExpression.RData")) load(file.path("RDataObjects","ccleClinical.RData")) all(rownames(ccleClinical)==colnames(ccleExpression)) ``` ```{r plotFABP4vsADH1BforCCLE} fabp4.ps <- "203980_at" adh1b.ps <- "209612_s_at" bgColors <- rep("grey",nrow(ccleClinical)) bgColors[ccleClinical[,"primarySite"]=="ovary"] <- "pink" bgColors[ccleClinical[,"primarySite"]=="breast"] <- "blue" plot(ccleExpression[fabp4.ps,], ccleExpression[adh1b.ps,], pch=21,bg=bgColors, xlab="FABP4 Expression (203980_at)", ylab="ADH1B Expression (209612_s_at)", main="FABP4 and ADH1B in CCLE Samples") abline(v=5,h=4.5) legend("topright", c("Ovary", "Breast", "Other"), pch = 19, col = c("pink", "blue", "grey"), bty = "n", title = "Tissue") ``` ```{r plotFABP4vsADH1BforCCLEPdf, echo=FALSE} pdf(file=file.path("Figures","fabp4VsAdh1bCCLE.pdf")) fabp4.ps <- "203980_at" adh1b.ps <- "209612_s_at" bgColors <- rep("grey",nrow(ccleClinical)) bgColors[ccleClinical[,"primarySite"]=="ovary"] <- "pink" bgColors[ccleClinical[,"primarySite"]=="breast"] <- "blue" plot(ccleExpression[fabp4.ps,], ccleExpression[adh1b.ps,], pch=21,bg=bgColors, xlab="FABP4 Expression (203980_at)", ylab="ADH1B Expression (209612_s_at)", main="FABP4 and ADH1B in CCLE Samples") abline(v=5,h=4.5) legend("topright", c("Ovary", "Breast", "Other"), pch = 19, col = c("pink", "blue", "grey"), bty = "n", title = "Tissue") dev.off() ``` ```{r cleanupCCLE} rm(ccleClinical, ccleExpression, fabp4.ps, adh1b.ps, bgColors) ``` ## Appendix ```{r getLocation} getwd() ``` ```{r sessionInfo} sessionInfo() ```