Chapter 15 TCGA: Associate Epcam+ inflammatory with survival

Associate the signature with outcome in TCGA: Load in TCGA right now

TCGArsem=read.delim("../data/TCGA/BRCA.rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes_normalized__data.data.txt", sep="\t")
rownames(TCGArsem)=TCGArsem[ ,1]
TCGArsem=TCGArsem[, grep("01A", colnames(TCGArsem))]
colnames(TCGArsem)=substr(colnames(TCGArsem), 1, 12)
colnames(TCGArsem)=gsub("\\.", "-", colnames(TCGArsem))
TCGArsem=TCGArsem[-1, ]

TCGArsem2=apply(TCGArsem, 2, as.numeric)
#TCGArsem2=t(TCGArsem2)
#colnames(TCGArsem2)=colnames(TCGArsem)
#TCGArsem2=data.frame(TCGArsem2)
rownames(TCGArsem2)=rownames(TCGArsem)
rownames(TCGArsem2)=sapply(strsplit(rownames(TCGArsem2), "\\|"), function(x) x[1])
TCGArsem2=TCGArsem2[-which(rownames(TCGArsem2)=="?"), ]

load("../data/TCGA/BrClin_clinical_Nov2017.RData")

ax1=match(BrClin$Patient.ID, colnames(TCGArsem))

BrClin=BrClin[-which(is.na(ax1)), ]
TCGArsem=TCGArsem[ , na.omit(ax1)]

## load in the inflammation subtype information
#ThorssData=read.xlsx("../data/TCGA/Thorsson2018_table1.xlsx",1)
ThorssData=read.csv("../data/TCGA/Thorsson2018_table1.csv")

Edit the clinical data to make sure data is censored at 60 months (5 years) and that stage is given an integer value (no 2A, 2b etc.) for easier comparisons

#m1=match(colnames(TCGAssgsea), BrClin$Patient.ID)
BrClin$Stage=(substr(BrClin$American.Joint.Committee.on.Cancer.Tumor.Stage.Code, 2, 2))
BrClin$Stage[which(BrClin$Stage==""|BrClin$Stage=="X")]=NA

BrClin$Overall.Survival..Months.[which(BrClin$Overall.Survival..Months.>=60)]=60
BrClin$Overall.Survival.Status[which(BrClin$Overall.Survival..Months.>=60)]="LIVING"

BrClin$Disease.Free..Months.[which(BrClin$Disease.Free..Months.>=60)]=60
BrClin$Disease.Free.Status[which(BrClin$Disease.Free..Months.>=60)]="Disease Free"

15.1 Associating CD74 with phenotype and outcome

The Thorsson data has pre-calculated scores for:

immune subtypes
leukocyte fractins
proportion of data with coding mutations
TCR shannon index

Z-Scale CD74 scores prior to analysis:

TCGAssgsea=scale(TCGArsem2[match("CD74", rownames(TCGArsem2)), ])
m1=match(colnames(TCGArsem2), BrClin$Patient.ID)

tinfo=data.frame(pam=BrClin$PAM50[m1], OSM=BrClin$Overall.Survival..Months.[m1],
                 OSS=BrClin$Overall.Survival.Status[m1], cd74=TCGAssgsea,
                 DFS=BrClin$Disease.Free.Status[m1],
                 DFM=BrClin$Disease.Free..Months.[m1],
                 Stage=BrClin$Stage[m1])

ThorssData=ThorssData[which(ThorssData$TCGA.Study=="BRCA"), ]

n1=match(rownames(tinfo), ThorssData$TCGA.Participant.Barcode)
tinfo$immSub=ThorssData$Immune.Subtype[n1]
tinfo$leukFrac=as.numeric(ThorssData$Leukocyte.Fraction[n1])
tinfo$strFrac=as.numeric(ThorssData$Stromal.Fraction[n1])
tinfo$codingMut=as.numeric(ThorssData$Nonsilent.Mutation.Rate[n1])
tinfo$TCRshann=as.numeric(ThorssData$TCR.Shannon[n1])

par(mfrow=c(2,3))
boxplot(tinfo$cd74~tinfo$pam, ylab="gene exp", main="PAM50")
boxplot(tinfo$cd74~tinfo$immSub, ylab="gene exp", main="Immune sbtype")
ax=cor.test(tinfo$cd74, log10(tinfo$codingMut+1), method="spearman")
smoothScatter(tinfo$cd74, log10(tinfo$codingMut+1), xlab="gene exp", ylab="log10 mut")
text(4,0.5, sprintf("cor=%s, p=%s", round(ax$estimate, 2), round(ax$p.value, 3)))
ax=cor.test(tinfo$cd74, tinfo$leukFrac, method="spearman")
smoothScatter(tinfo$cd74, (tinfo$leukFrac), xlab="gene exp", ylab="leuk frac")
text(4,0.2, sprintf("cor=%s, p=%s", round(ax$estimate, 2), round(ax$p.value, 3)))
ax=cor.test(tinfo$cd74, tinfo$TCRshann, method="spearman")
smoothScatter(tinfo$cd74, tinfo$TCRshann, xlab="gene exp", ylab="TCR diversity")
text(4,1, sprintf("cor=%s, p=%s", round(ax$estimate, 2), round(ax$p.value, 3)))
hist(tinfo$cd74)

Figure 15.1: CD74 assoc with patient data


#write.csv(tinfo, file="nature-tables/5d-TCGA_data.csv")

We can also check if there is an association with survival:


axD=Surv(tinfo$DFM, ifelse(tinfo$DFS=="Recurred/Progressed", 1, ifelse(tinfo$DFS=="DiseaseFree", 0, NA)))

axO=Surv(tinfo$OSM, ifelse(tinfo$OSS=="DECEASED", 1, ifelse(tinfo$OSS=="LIVING", 0, NA)))

SurvOSS=coxph(axO~tinfo$pam+tinfo$Stage+tinfo$cd74)
SurvDFS=coxph(axD~tinfo$pam+tinfo$Stage+tinfo$cd74)

ao=summary(SurvOSS)
ao
## Call:
## coxph(formula = axO ~ tinfo$pam + tinfo$Stage + tinfo$cd74)
## 
##   n= 804, number of events= 78 
##    (276 observations deleted due to missingness)
## 
##                    coef exp(coef) se(coef)      z Pr(>|z|)    
## tinfo$pamHer2    0.3709    1.4491   0.3780  0.981   0.3265    
## tinfo$pamLumA   -0.8365    0.4332   0.3091 -2.707   0.0068 ** 
## tinfo$pamLumB   -0.3137    0.7308   0.3409 -0.920   0.3574    
## tinfo$pamNormal -0.2546    0.7752   0.6282 -0.405   0.6853    
## tinfo$Stage2     0.6162    1.8519   0.3309  1.862   0.0626 .  
## tinfo$Stage3     0.6636    1.9417   0.4315  1.538   0.1241    
## tinfo$Stage4     1.9960    7.3594   0.4255  4.691 2.72e-06 ***
## tinfo$cd74      -0.3021    0.7392   0.1530 -1.974   0.0484 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## tinfo$pamHer2      1.4491     0.6901    0.6907    3.0400
## tinfo$pamLumA      0.4332     2.3083    0.2364    0.7939
## tinfo$pamLumB      0.7308     1.3685    0.3747    1.4253
## tinfo$pamNormal    0.7752     1.2899    0.2263    2.6557
## tinfo$Stage2       1.8519     0.5400    0.9681    3.5425
## tinfo$Stage3       1.9417     0.5150    0.8335    4.5234
## tinfo$Stage4       7.3594     0.1359    3.1963   16.9448
## tinfo$cd74         0.7392     1.3528    0.5477    0.9978
## 
## Concordance= 0.715  (se = 0.031 )
## Likelihood ratio test= 42.32  on 8 df,   p=1e-06
## Wald test            = 47.41  on 8 df,   p=1e-07
## Score (logrank) test = 56.19  on 8 df,   p=3e-09
as=summary(SurvDFS)
as
## Call:
## coxph(formula = axD ~ tinfo$pam + tinfo$Stage + tinfo$cd74)
## 
##   n= 587, number of events= 69 
##    (493 observations deleted due to missingness)
## 
##                    coef exp(coef) se(coef)      z Pr(>|z|)    
## tinfo$pamHer2   -0.4626    0.6296   0.4860 -0.952  0.34117    
## tinfo$pamLumA   -1.0004    0.3677   0.3158 -3.168  0.00153 ** 
## tinfo$pamLumB   -0.8377    0.4327   0.3701 -2.263  0.02361 *  
## tinfo$pamNormal -0.8810    0.4143   0.7608 -1.158  0.24681    
## tinfo$Stage2     0.5470    1.7281   0.3389  1.614  0.10648    
## tinfo$Stage3     1.0539    2.8687   0.4440  2.373  0.01762 *  
## tinfo$Stage4     2.6058   13.5422   0.4836  5.388 7.13e-08 ***
## tinfo$cd74      -0.1134    0.8928   0.1456 -0.779  0.43600    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## tinfo$pamHer2      0.6296    1.58824   0.24287    1.6323
## tinfo$pamLumA      0.3677    2.71950   0.19802    0.6828
## tinfo$pamLumB      0.4327    2.31113   0.20948    0.8937
## tinfo$pamNormal    0.4143    2.41342   0.09329    1.8404
## tinfo$Stage2       1.7281    0.57866   0.88943    3.3577
## tinfo$Stage3       2.8687    0.34859   1.20151    6.8493
## tinfo$Stage4      13.5422    0.07384   5.24827   34.9432
## tinfo$cd74         0.8928    1.12006   0.67121    1.1876
## 
## Concordance= 0.69  (se = 0.039 )
## Likelihood ratio test= 32.15  on 8 df,   p=9e-05
## Wald test            = 39.03  on 8 df,   p=5e-06
## Score (logrank) test = 50.73  on 8 df,   p=3e-08

Note that CD74 is associated with survival: The harzards ratio is 1.851886 (95CI:0.9680967, 3.5424989), p value 1.176905110^{-6}.

This means that this is associated with survival regardless of subtype. We can look at these values in forestplots


data1=data.frame(X1=c("","PAM50", "", "", "", "", "Stage", "","","", "CD74"),
                X2=c("","Basal", "Her2", "LumA", "LumB", "Normal" ,"1", "2", "3", "4", ""),
                X3=c("NSamp",  table(tinfo$pam), table(tinfo$Stage), length(na.omit(tinfo$cd74))),
                X4=c("HR", "", round(summary(SurvOSS)$coefficients[ 1:4,2],2), "",
                     round(summary(SurvOSS)$coefficients[ 5:8,2],2)))

mdata=data.frame(summary(SurvOSS)$conf.int[ ,c(1,3:4)])
mdata=rbind(NA,NA, mdata[1:4, ], NA, mdata[5:8, ])

#pdf("figure-outputs/Figure5_CD74_forestplot.pdf", width=5, height=5)

forestplot(data1, mdata, xlog=T,
           boxsize=0.5)

Figure 15.2: forest plot CD74


#write.csv(cbind(data1, mdata), file="nature-tables/5e_forest_plot.csv")

DT::datatable(cbind(data1, mdata), rownames=F, class='cell-border stripe',
          extensions="Buttons", options=list(dom="Bfrtip", buttons=c('csv', 'excel')))

Figure 15.2: forest plot CD74

We can look below what the association with subtype is in KM curves:

par(mfrow=c(2,2))

Xind=c("Basal", "Her2", "LumA", "LumB")

for (i in Xind){
  l1=which(tinfo$pam==i)
  ax=Surv(tinfo$OSM[l1], ifelse(tinfo$OSS[l1]=="DECEASED", 1, ifelse(tinfo$OSS[l1]=="LIVING", 0, NA)))
  TCGAvalCut=cut(tinfo$cd74[l1], quantile(tinfo$cd74[l1], c(0, 0.33, 0.67, 1)), c("L","M", "P"))
  ax1=summary(coxph(ax~TCGAvalCut+tinfo$Stage[l1]))
  ax2=plot(survfit(ax~TCGAvalCut), main=paste(i, "CD74 gene exp"), col=brewer.pal(3, "Blues"),lwd=2, xlab="Time (months)", ylab="OS", mark.time=T)
  text(20, 0.2, sprintf("HR:%s (%s-%s) p=%s", round(ax1$coefficients[2,2 ],2), 
                     round(ax1$conf.int[2,3 ],2), round(ax1$conf.int[2, 4],2),round(ax1$logtest[3],2)))
}

Figure 15.3: OS CD74 by subtype


par(mfrow=c(2,2))

for (i in Xind){
   l1=which(tinfo$pam==i)
  ax=Surv(tinfo$DFM[l1], ifelse(tinfo$DFS[l1]=="Recurred/Progressed", 1, ifelse(tinfo$DFS[l1]=="DiseaseFree", 0, NA)))
  TCGAvalCut=cut(tinfo$cd74[l1], quantile(tinfo$cd74[l1], c(0, 0.33, 0.67, 1)), c("L","M", "P"))
  ax1=summary(coxph(ax~TCGAvalCut+tinfo$Stage[l1]))
  ax2=plot(survfit(ax~TCGAvalCut), main=paste(i, "CD74 gene exp"), col=brewer.pal(3, "Blues"),lwd=2, xlab="Time (months)", ylab="DFS", mark.time=T)
  text(20, 0.2, sprintf("HR:%s (%s-%s) p=%s", round(ax1$coefficients[2,2 ],2), 
                     round(ax1$conf.int[2,3 ],2), round(ax1$conf.int[2, 4],2),round(ax1$logtest[3],2)))
}

Figure 15.4: DFS CD74 by subtype

15.2 Associating ADMATS10 with phenotype and outcome

Do the same thing as above with the ADAMST10 scores

15.2.1 All samples

Z-Scale ADAMTS10 scores prior to analysis:

TCGAssgsea=scale(TCGArsem2[match("ADAMTS10", rownames(TCGArsem2)), ])
tinfo$adamts10=TCGAssgsea

#pdf("figure-outputs/Figure5_ADAMST10_output.pdf", height=7, width=9)

par(mfrow=c(2,3))
boxplot(tinfo$adamts10~tinfo$pam, ylab="gene exp", main="PAM50")
boxplot(tinfo$adamts10~tinfo$immSub, ylab="gene exp", main="Immune sbtype")
ax=cor.test(tinfo$adamts10, log10(tinfo$codingMut+1), method="spearman")
smoothScatter(tinfo$adamts10, log10(tinfo$codingMut+1), xlab="gene exp", ylab="log10 mut")
text(4,0.5, sprintf("cor=%s, p=%s", round(ax$estimate, 2), round(ax$p.value, 3)))
ax=cor.test(tinfo$adamts10, tinfo$leukFrac, method="spearman")
smoothScatter(tinfo$adamts10, (tinfo$leukFrac), xlab="gene exp", ylab="leuk frac")
text(4,0.2, sprintf("cor=%s, p=%s", round(ax$estimate, 2), round(ax$p.value, 3)))
ax=cor.test(tinfo$adamts10, tinfo$TCRshann, method="spearman")
smoothScatter(tinfo$adamts10, tinfo$TCRshann, xlab="gene exp", ylab="TCR diversity")
text(4,1, sprintf("cor=%s, p=%s", round(ax$estimate, 2), round(ax$p.value, 3)))
#hist(tinfo$adamts10)
ax=cor.test(tinfo$adamts10, tinfo$strFrac, method="spearman")
smoothScatter(tinfo$adamts10, (tinfo$strFrac), xlab="gene exp", ylab="str frac")
text(4,0.2, sprintf("cor=%s, p=%s", round(ax$estimate, 2), round(ax$p.value, 3)))

Figure 15.5: ADAMTS10 assoc with patient data


DT::datatable(tinfo, rownames=F, class='cell-border stripe',
          extensions="Buttons", options=list(dom="Bfrtip", buttons=c('csv', 'excel')))

Figure 15.5: ADAMTS10 assoc with patient data

We can also check if there is an association with survival:


axD=Surv(tinfo$DFM, ifelse(tinfo$DFS=="Recurred/Progressed", 1, ifelse(tinfo$DFS=="DiseaseFree", 0, NA)))

axO=Surv(tinfo$OSM, ifelse(tinfo$OSS=="DECEASED", 1, ifelse(tinfo$OSS=="LIVING", 0, NA)))

SurvOSS=coxph(axO~tinfo$pam+tinfo$Stage+tinfo$adamts10)
SurvDFS=coxph(axD~tinfo$pam+tinfo$Stage+tinfo$adamts10)

ao=summary(SurvOSS)
ao
## Call:
## coxph(formula = axO ~ tinfo$pam + tinfo$Stage + tinfo$adamts10)
## 
##   n= 804, number of events= 78 
##    (276 observations deleted due to missingness)
## 
##                     coef exp(coef) se(coef)      z Pr(>|z|)    
## tinfo$pamHer2    0.54256   1.72041  0.38290  1.417  0.15649    
## tinfo$pamLumA   -0.76154   0.46695  0.30686 -2.482  0.01308 *  
## tinfo$pamLumB    0.01904   1.01922  0.34481  0.055  0.95596    
## tinfo$pamNormal -0.45045   0.63734  0.62772 -0.718  0.47301    
## tinfo$Stage2     0.78659   2.19589  0.32970  2.386  0.01704 *  
## tinfo$Stage3     0.65338   1.92203  0.43174  1.513  0.13018    
## tinfo$Stage4     2.22438   9.24771  0.42640  5.217 1.82e-07 ***
## tinfo$adamts10   0.33950   1.40424  0.11926  2.847  0.00442 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## tinfo$pamHer2      1.7204     0.5813    0.8123    3.6438
## tinfo$pamLumA      0.4669     2.1416    0.2559    0.8521
## tinfo$pamLumB      1.0192     0.9811    0.5185    2.0034
## tinfo$pamNormal    0.6373     1.5690    0.1862    2.1812
## tinfo$Stage2       2.1959     0.4554    1.1507    4.1903
## tinfo$Stage3       1.9220     0.5203    0.8246    4.4798
## tinfo$Stage4       9.2477     0.1081    4.0094   21.3299
## tinfo$adamts10     1.4042     0.7121    1.1116    1.7740
## 
## Concordance= 0.705  (se = 0.033 )
## Likelihood ratio test= 44.68  on 8 df,   p=4e-07
## Wald test            = 51.25  on 8 df,   p=2e-08
## Score (logrank) test = 59.87  on 8 df,   p=5e-10
as=summary(SurvDFS)
as
## Call:
## coxph(formula = axD ~ tinfo$pam + tinfo$Stage + tinfo$adamts10)
## 
##   n= 587, number of events= 69 
##    (493 observations deleted due to missingness)
## 
##                    coef exp(coef) se(coef)      z Pr(>|z|)    
## tinfo$pamHer2   -0.3174    0.7280   0.4917 -0.646  0.51857    
## tinfo$pamLumA   -0.9437    0.3892   0.3139 -3.006  0.00264 ** 
## tinfo$pamLumB   -0.6459    0.5242   0.3782 -1.708  0.08767 .  
## tinfo$pamNormal -1.0680    0.3437   0.7656 -1.395  0.16301    
## tinfo$Stage2     0.5881    1.8006   0.3392  1.734  0.08293 .  
## tinfo$Stage3     1.0235    2.7830   0.4459  2.295  0.02171 *  
## tinfo$Stage4     2.6126   13.6344   0.4816  5.425 5.81e-08 ***
## tinfo$adamts10   0.2219    1.2485   0.1273  1.743  0.08125 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## tinfo$pamHer2      0.7280    1.37355   0.27775     1.908
## tinfo$pamLumA      0.3892    2.56954   0.21035     0.720
## tinfo$pamLumB      0.5242    1.90773   0.24978     1.100
## tinfo$pamNormal    0.3437    2.90957   0.07665     1.541
## tinfo$Stage2       1.8006    0.55537   0.92619     3.501
## tinfo$Stage3       2.7830    0.35933   1.16137     6.669
## tinfo$Stage4      13.6344    0.07334   5.30498    35.042
## tinfo$adamts10     1.2485    0.80099   0.97282     1.602
## 
## Concordance= 0.671  (se = 0.041 )
## Likelihood ratio test= 34.27  on 8 df,   p=4e-05
## Wald test            = 41.62  on 8 df,   p=2e-06
## Score (logrank) test = 53.32  on 8 df,   p=9e-09

Note that ADAMST10 is associated with survival: The harzards ratio is 2.1958872 (95CI:1.150724, 4.1903363), p value 4.22777410^{-7}.

This means that this is associated with survival regardless of subtype. We can look at these values in forestplots


data1=data.frame(X1=c("","PAM50", "", "", "", "", "Stage", "","","", "ADAMST10"),
                X2=c("","Basal", "Her2", "LumA", "LumB", "Normal" ,"1", "2", "3", "4", ""),
                X3=c("NSamp",  table(tinfo$pam), table(tinfo$Stage), length(na.omit(tinfo$cd74))),
                X4=c("HR", "", round(summary(SurvOSS)$coefficients[ 1:4,2],2), "",
                     round(summary(SurvOSS)$coefficients[ 5:8,2],2)))

mdata=data.frame(summary(SurvOSS)$conf.int[ ,c(1,3:4)])
mdata=rbind(NA,NA, mdata[1:4, ], NA, mdata[5:8, ])

#pdf("figure-outputs/Figure5_CD74_forestplot.pdf", width=5, height=5)

forestplot(data1, mdata, xlog=T,
           boxsize=0.5)

Figure 15.6: forest plot ADAMTS10


#dev.off()
#write.csv(cbind(data1, mdata), file="nature-tables/5e_forest_plot_adamts10.csv")

DT::datatable(cbind(data1, mdata), rownames=F, class='cell-border stripe',
          extensions="Buttons", options=list(dom="Bfrtip", buttons=c('csv', 'excel')))

Figure 15.6: forest plot ADAMTS10

We can look below what the association with subtype is in KM curves:

par(mfrow=c(2,2))

Xind=c("Basal", "Her2", "LumA", "LumB")

for (i in Xind){
  l1=which(tinfo$pam==i)
  ax=Surv(tinfo$OSM[l1], ifelse(tinfo$OSS[l1]=="DECEASED", 1, ifelse(tinfo$OSS[l1]=="LIVING", 0, NA)))
  TCGAvalCut=cut(tinfo$adamts10[l1], quantile(tinfo$adamts10[l1], c(0, 0.33, 0.67, 1)), c("L","M", "P"))
  ax1=summary(coxph(ax~TCGAvalCut+tinfo$Stage[l1]))
  ax2=plot(survfit(ax~TCGAvalCut), main=paste(i, "ADAMTS10 gene exp"), col=brewer.pal(3, "Blues"),lwd=2, xlab="Time (months)", ylab="OS", mark.time=T)
  text(20, 0.2, sprintf("HR:%s (%s-%s) p=%s", round(ax1$coefficients[2,2 ],2), 
                     round(ax1$conf.int[2,3 ],2), round(ax1$conf.int[2, 4],2),round(ax1$logtest[3],2)))
}

Figure 15.7: OS CD74 by subtype


par(mfrow=c(2,2))

for (i in Xind){
   l1=which(tinfo$pam==i)
  ax=Surv(tinfo$DFM[l1], ifelse(tinfo$DFS[l1]=="Recurred/Progressed", 1, ifelse(tinfo$DFS[l1]=="DiseaseFree", 0, NA)))
  TCGAvalCut=cut(tinfo$adamts10[l1], quantile(tinfo$adamts10[l1], c(0, 0.33, 0.67, 1)), c("L","M", "P"))
  ax1=summary(coxph(ax~TCGAvalCut+tinfo$Stage[l1]))
  ax2=plot(survfit(ax~TCGAvalCut), main=paste(i, "ADAMTS10 gene exp"), col=brewer.pal(3, "Blues"),lwd=2, xlab="Time (months)", ylab="DFS", mark.time=T)
  text(20, 0.2, sprintf("HR:%s (%s-%s) p=%s", round(ax1$coefficients[2,2 ],2), 
                     round(ax1$conf.int[2,3 ],2), round(ax1$conf.int[2, 4],2),round(ax1$logtest[3],2)))
}

Figure 15.8: DFS ADAMTS10 by subtype

15.2.2 Redone using only ER cases

Redo survival and limit only to ER+ cases:

## make a change here
tinfoER=tinfo[which(tinfo$pam%in%c("LumA", "LumB")),  ]
tinfoER$pam=factor(tinfoER$pam)

axD=Surv(tinfoER$DFM, ifelse(tinfoER$DFS=="Recurred/Progressed", 1, ifelse(tinfoER$DFS=="DiseaseFree", 0, NA)))

axO=Surv(tinfoER$OSM, ifelse(tinfoER$OSS=="DECEASED", 1, ifelse(tinfoER$OSS=="LIVING", 0, NA)))

SurvOSS=coxph(axO~tinfoER$pam+tinfoER$Stage+tinfoER$adamts10)
SurvDFS=coxph(axD~tinfoER$pam+tinfoER$Stage+tinfoER$adamts10)

ao=summary(SurvOSS)
ao
## Call:
## coxph(formula = axO ~ tinfoER$pam + tinfoER$Stage + tinfoER$adamts10)
## 
##   n= 580, number of events= 45 
##    (2 observations deleted due to missingness)
## 
##                     coef exp(coef) se(coef)     z Pr(>|z|)    
## tinfoER$pamLumB   0.7352    2.0859   0.3289 2.235  0.02538 *  
## tinfoER$Stage2    1.2321    3.4283   0.4604 2.676  0.00745 ** 
## tinfoER$Stage3    0.4820    1.6193   0.6456 0.747  0.45530    
## tinfoER$Stage4    2.6492   14.1425   0.5702 4.646 3.39e-06 ***
## tinfoER$adamts10  0.3179    1.3742   0.1796 1.770  0.07672 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## tinfoER$pamLumB      2.086    0.47942    1.0948     3.974
## tinfoER$Stage2       3.428    0.29169    1.3905     8.452
## tinfoER$Stage3       1.619    0.61754    0.4569     5.739
## tinfoER$Stage4      14.143    0.07071    4.6253    43.243
## tinfoER$adamts10     1.374    0.72769    0.9665     1.954
## 
## Concordance= 0.682  (se = 0.044 )
## Likelihood ratio test= 27.23  on 5 df,   p=5e-05
## Wald test            = 28.36  on 5 df,   p=3e-05
## Score (logrank) test = 35.76  on 5 df,   p=1e-06
as=summary(SurvDFS)
as
## Call:
## coxph(formula = axD ~ tinfoER$pam + tinfoER$Stage + tinfoER$adamts10)
## 
##   n= 431, number of events= 44 
##    (151 observations deleted due to missingness)
## 
##                    coef exp(coef) se(coef)     z Pr(>|z|)   
## tinfoER$pamLumB  0.3624    1.4367   0.3493 1.038  0.29949   
## tinfoER$Stage2   0.6869    1.9875   0.4159 1.651  0.09866 . 
## tinfoER$Stage3   1.1168    3.0550   0.5321 2.099  0.03583 * 
## tinfoER$Stage4   1.8789    6.5462   0.6964 2.698  0.00698 **
## tinfoER$adamts10 0.3702    1.4480   0.1702 2.175  0.02966 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## tinfoER$pamLumB      1.437     0.6960    0.7246     2.849
## tinfoER$Stage2       1.987     0.5032    0.8795     4.491
## tinfoER$Stage3       3.055     0.3273    1.0767     8.668
## tinfoER$Stage4       6.546     0.1528    1.6718    25.633
## tinfoER$adamts10     1.448     0.6906    1.0372     2.022
## 
## Concordance= 0.588  (se = 0.052 )
## Likelihood ratio test= 12.76  on 5 df,   p=0.03
## Wald test            = 13.76  on 5 df,   p=0.02
## Score (logrank) test = 14.92  on 5 df,   p=0.01

#pdf("figure-outputs/NEwpanelB.pdf", height=7, width=9)

par(mfrow=c(2,3))

boxplot(tinfoER$adamts10~tinfoER$pam, ylab="gene exp", main="PAM50")
boxplot(tinfoER$adamts10~tinfoER$immSub, ylab="gene exp", main="Immune sbtype")
ax=cor.test(tinfoER$adamts10, log10(tinfoER$codingMut+1), method="spearman")
smoothScatter(tinfoER$adamts10, log10(tinfoER$codingMut+1), xlab="gene exp", ylab="log10 mut")
text(4,0.5, sprintf("cor=%s, p=%s", round(ax$estimate, 2), round(ax$p.value, 3)))
ax=cor.test(tinfoER$adamts10, tinfoER$leukFrac, method="spearman")
smoothScatter(tinfoER$adamts10, (tinfoER$leukFrac), xlab="gene exp", ylab="leuk frac")
text(4,0.2, sprintf("cor=%s, p=%s", round(ax$estimate, 2), round(ax$p.value, 3)))
ax=cor.test(tinfoER$adamts10, tinfoER$TCRshann, method="spearman")
smoothScatter(tinfoER$adamts10, tinfoER$TCRshann, xlab="gene exp", ylab="TCR diversity")
text(4,1, sprintf("cor=%s, p=%s", round(ax$estimate, 2), round(ax$p.value, 3)))
ax=cor.test(tinfoER$adamts10, tinfoER$strFrac, method="spearman")
smoothScatter(tinfoER$adamts10, (tinfoER$strFrac), xlab="gene exp", ylab="str frac")
text(4,0.2, sprintf("cor=%s, p=%s", round(ax$estimate, 2), round(ax$p.value, 3)))


DT::datatable(tinfoER, rownames=F, class='cell-border stripe',
          extensions="Buttons", options=list(dom="Bfrtip", buttons=c('csv', 'excel')))

Also check the forest plots


data1=data.frame(X1=c("","PAM50", "",  "Stage", "","","", "ADAMST10"),
                X2=c("","LumA", "LumB" ,"1", "2", "3", "4", ""),
                X3=c("NSamp",  table(tinfoER$pam), table(tinfoER$Stage), length(na.omit(tinfoER$adamts10))),
                X4=c("HR", "", round(summary(SurvOSS)$coefficients[ 1,2],2), "",
                     round(summary(SurvOSS)$coefficients[ 2:5,2],2)))

mdata=data.frame(summary(SurvOSS)$conf.int[ ,c(1,3:4)])
mdata=rbind(NA,NA, mdata[1, ], NA, mdata[2:5, ])

#pdf("figure-outputs/Figure5_CD74_forestplot.pdf", width=5, height=5)

forestplot(data1, mdata, xlog=T,
           boxsize=0.5)

Figure 15.9: Adamts10 expression survival analysis, ER only


DT::datatable(cbind(data1, mdata), rownames=F, class='cell-border stripe',
          extensions="Buttons", options=list(dom="Bfrtip", buttons=c('csv', 'excel')))

Figure 15.9: Adamts10 expression survival analysis, ER only

15.3 Signature: Lum cases non-inflammatory: growing vs stable

Here, use the signature which was applied before to see the difference between the non-inflammatory growing vs stable samples. We compare this to the oncotypedx list of genes and mammaprint

#GlistGrowing=rownames(vstLumRes)[which(vstLumRes$log2FoldChange>1.5 & vstLumRes$baseMean>100 & vstLumRes$padj<0.05 )]
#Glistnon=rownames(vstLumRes)[which(vstLumRes$log2FoldChange<(-1.5) & vstLumRes$baseMean>100 & vstLumRes$padj<0.05 )]

HumGeneList=SymHum2Rat$HGNC.symbol[match(GlistGrowing, SymHum2Rat$RGD.symbol)]
HumGeneList2=Rat2Hum$HGNC.symbol[match(GlistGrowing, Rat2Hum$RGD.symbol)]

TCGAssgsea=gsva((TCGArsem2), list(grow=na.omit(HumGeneList2), 
                                                stab=Rat2Hum$HGNC.symbol[match(Glistnon, Rat2Hum$RGD.symbol)], Onco=Oncoprint[[1]], Mamma=Oncoprint[[2]]), method="ssgsea", ssgsea.norm=F)

TCGAssgsea=t(scale(t(TCGAssgsea)))

par(mfrow=c(1,2))
hist(TCGAssgsea[1, ], main="stable")
hist(TCGAssgsea[2, ], main="growing")

plot(TCGAssgsea[1, ], TCGAssgsea[2, ], xlab="stable", ylab="growing")

## also compare with onco or mamma print

plot(TCGAssgsea[1, ], TCGAssgsea[3, ], xlab="stable", ylab="oncotype")

plot(TCGAssgsea[1, ], TCGAssgsea[4, ], xlab="stable", ylab="mammaprint")


write.csv("nature-tables/Supplemental_table5.csv")

The Growing signature genes (positive coefficient) are Setd7, Hspa1l, Hbb, Kifc2, Kmt5c, Pik3c2b, Lmntd2, Nsmf, Nrbp2, LOC100134871, Hsp90aa1, Coro6, Chst8, Slc4a3, Fhod1, Rpl8, Spp1, Catsperg, Col5a1, LOC100911498, Noxa1, F8, Rps19, Rps6, Irs1, Leng8, Plekhh1, Lmbr1l, Ikbke, Pnisr, Adamts10, Zfp692, Trim41, Sema6d, Mgp, Cdc42bpg, Slc16a13, Dmpk and stable (negative coefficient) are Fgfr2, Mx1, Endou, Grhl3, Slpi, Gbp2, Plg, Aldh1a3, Upk3a.

Below are the survival curves for the growing signature:

#pdf("figure-outputs/Fig5_survival_analysis__poslist_Lum_samples_baseMean_greater100.pdf", height=8, width = 8)

par(mfrow=c(2,2))

Xind=c("Basal", "Her2", "LumA", "LumB")

for (i in Xind){
  l1=which(BrClin$PAM50==i)
  lx2=match(BrClin$Patient.ID[l1], colnames(TCGArsem2))
  ax=Surv(BrClin$Overall.Survival..Months.[l1], ifelse(BrClin$Overall.Survival.Status[l1]=="DECEASED", 1, ifelse(BrClin$Overall.Survival.Status[l1]=="LIVING", 0, NA)))
   axb=coxph(ax~TCGAssgsea[1, lx2]+BrClin$Stage[l1])
  axc=summary(axb)
  TCGAvalCut=cut(TCGAssgsea[1, lx2], quantile(TCGAssgsea[1, lx2], c(0, 0.33, 0.67, 1)), c("L","M", "P"))
  ax2=plot(survfit(ax~TCGAvalCut), main=paste(i, "lum growing"), col=brewer.pal(3, "Blues"),lwd=2, xlab="Time (months)", ylab="OS", mark.time=T)
   text(10, 0, sprintf("univ. cts. var. HR=%s (%s-%s), p=%s", round(axc$coefficients[1,2], 2),
                      round(axc$conf.int[1,3],2), round(axc$conf.int[1,4],2),round(axc$logtest[3],2 )))
   legend("topleft",paste(c("L", "M", "H"), summary(TCGAvalCut)), col=brewer.pal(3, "Blues"), lwd=1,
          pch=19)
  
}

Figure 15.10: OS: growing signature



for (i in Xind){
  l1=which(BrClin$PAM50==i)
  lx2=match(BrClin$Patient.ID[l1], colnames(TCGArsem2))
  ax=Surv(BrClin$Disease.Free..Months.[l1], ifelse(BrClin$Disease.Free.Status[l1]=="Recurred/Progressed", 1, ifelse(BrClin$Disease.Free.Status[l1]=="DiseaseFree", 0, NA)))
  axb=coxph(ax~TCGAssgsea[1, lx2]+BrClin$Stage[l1])
  axc=summary(axb)
  
  TCGAvalCut=cut(TCGAssgsea[1, lx2], quantile(TCGAssgsea[1, lx2], c(0, 0.33, 0.67, 1)), c("L","M", "P"))
  ax2=plot(survfit(ax~TCGAvalCut), main=paste(i, "lum growing"), col=brewer.pal(3, "Blues"),lwd=2, xlab="Time (months)", ylab="DFS", mark.time=T)
  text(10, 0, sprintf("univ. cts. var. HR=%s (%s-%s), p=%s", round(axc$coefficients[1,2], 2),
                      round(axc$conf.int[1,3],2), round(axc$conf.int[1,4],2),round(axc$logtest[3],2 )))
 legend("topleft",paste(c("L", "M", "H"), summary(TCGAvalCut)), col=brewer.pal(3, "Blues"), lwd=1,
          pch=19)
}

Figure 15.11: OS: growing signature


#dev.off()

#DT::datatable(as.data.frame(test), rownames=F, class='cell-border stripe',
#         extensions="Buttons", options=list(dom="Bfrtip", buttons=c('csv', 'excel')))

The OS curves for the stable signature


#pdf("figure-outputs/Fig5_survival_analysis__neglist_Lum_samples_baseMean_greater100.pdf", height=8, width = 8)

par(mfrow=c(2,2))

Xind=c("Basal", "Her2", "LumA", "LumB")

for (i in Xind){
  l1=which(BrClin$PAM50==i)
   lx2=match(BrClin$Patient.ID[l1], colnames(TCGArsem2))
  ax=Surv(BrClin$Overall.Survival..Months.[l1], ifelse(BrClin$Overall.Survival.Status[l1]=="DECEASED", 1, ifelse(BrClin$Overall.Survival.Status[l1]=="LIVING", 0, NA)))
   axb=coxph(ax~TCGAssgsea[2, lx2]+BrClin$Stage[l1])
  axc=summary(axb)
  
  TCGAvalCut=cut(TCGAssgsea[2, lx2], quantile(TCGAssgsea[2, lx2], c(0, 0.33, 0.67, 1)), c("L","M", "P"))
  ax2=plot(survfit(ax~TCGAvalCut), main=paste(i, "lum stab"), col=brewer.pal(3, "Blues"),lwd=2, xlab="Time", ylab="OS", mark.time=T)
   text(10, 0, sprintf("univ. cts. var. HR=%s (%s-%s), p=%s", round(axc$coefficients[1,2], 2),
                      round(axc$conf.int[1,3],2), round(axc$conf.int[1,4],2),round(axc$logtest[3],2 )))
 #  legend("topleft",paste(c("L", "M", "H"), summary(TCGAvalCut)), col=brewer.pal(3, "Blues"), lwd=1,
#          pch=19)
  
}

Figure 15.12: OS: stable signature


#dev.off()

ax=cut(TCGAssgsea[2, lx2], quantile(TCGAssgsea[2, lx2], c(0, 0.33, 0.67, 1)), c("L","M", "P"))
table(ax)
## ax
##  L  M  P 
## 58 60 59

par(mfrow=c(2,2))

for (i in Xind){
  l1=which(BrClin$PAM50==i)
   lx2=match(BrClin$Patient.ID[l1], colnames(TCGArsem2))
  ax=Surv(BrClin$Disease.Free..Months.[l1], ifelse(BrClin$Disease.Free.Status[l1]=="Recurred/Progressed", 1, ifelse(BrClin$Disease.Free.Status[l1]=="DiseaseFree", 0, NA)))
   axb=coxph(ax~TCGAssgsea[2, lx2]+BrClin$Stage[l1])
   
  axc=summary(axb)
  axc
  TCGAvalCut=cut(TCGAssgsea[2, lx2], quantile(TCGAssgsea[2, lx2], c(0, 0.33, 0.67, 1)), c("L","M", "P"))
  ax2=plot(survfit(ax~TCGAvalCut), main=paste(i, "lum stable"), col=brewer.pal(3, "Blues"),lwd=2, xlab="Time (months)", ylab="DFS", mark.time=T)
    text(10, 0, sprintf("univ. cts. var. HR=%s (%s-%s), p=%s", round(axc$coefficients[1,2], 2),
                      round(axc$conf.int[1,3],2), round(axc$conf.int[1,4],2),round(axc$logtest[3],2 )))
#    legend("topleft",paste(c("L", "M", "H"), summary(TCGAvalCut)), col=brewer.pal(3, "Blues"), lwd=1,
#          pch=19)
  
}

Figure 15.13: OS: stable signature

Quick sanity checks: for LuminalA samples, check that the scores match with outcome

## coxph for growing signature:
 l1=which(BrClin$PAM50=="LumA")
   lx2=match(BrClin$Patient.ID[l1], colnames(TCGAssgsea))
  ax=Surv(BrClin$Disease.Free..Months.[l1], ifelse(BrClin$Disease.Free.Status[l1]=="Recurred/Progressed", 1, ifelse(BrClin$Disease.Free.Status[l1]=="DiseaseFree", 0, NA)))
   axb=coxph(ax~TCGAssgsea[1, lx2]+BrClin$Stage[l1])
   summary(axb)
## Call:
## coxph(formula = ax ~ TCGAssgsea[1, lx2] + BrClin$Stage[l1])
## 
##   n= 297, number of events= 30 
##    (122 observations deleted due to missingness)
## 
##                      coef exp(coef) se(coef)     z Pr(>|z|)   
## TCGAssgsea[1, lx2] 0.5325    1.7032   0.2003 2.658  0.00786 **
## BrClin$Stage[l1]2  0.1482    1.1598   0.4742 0.313  0.75465   
## BrClin$Stage[l1]3  0.7633    2.1455   0.5768 1.323  0.18570   
## BrClin$Stage[l1]4  1.1378    3.1200   0.8184 1.390  0.16445   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                    exp(coef) exp(-coef) lower .95 upper .95
## TCGAssgsea[1, lx2]     1.703     0.5871    1.1501     2.522
## BrClin$Stage[l1]2      1.160     0.8622    0.4578     2.938
## BrClin$Stage[l1]3      2.145     0.4661    0.6927     6.645
## BrClin$Stage[l1]4      3.120     0.3205    0.6273    15.517
## 
## Concordance= 0.678  (se = 0.045 )
## Likelihood ratio test= 11.87  on 4 df,   p=0.02
## Wald test            = 12.45  on 4 df,   p=0.01
## Score (logrank) test = 13.21  on 4 df,   p=0.01

## boxplot of growing signature with outcome:
par(mfrow=c(1,2))   
boxplot(TCGAssgsea[1, lx2]~BrClin$Disease.Free.Status[l1], main="growing sig with status")   
boxplot(TCGAssgsea[1, lx2]~BrClin$Overall.Survival.Status[l1], main="growing sig with OS")

Combined signature: subtract the stable signature from the growing signature and check the outcome here:

#pdf("figure-outputs/Fig5_survival_analysis_growing_subtract_stab_Lum_samples_baseMean_greater100.pdf", height=8, width = 8)

par(mfrow=c(2,2))

for (i in Xind){
  l1=which(BrClin$PAM50==i)
   lx2=match(BrClin$Patient.ID[l1], colnames(TCGArsem2))
  ax=Surv(BrClin$Disease.Free..Months.[l1], ifelse(BrClin$Disease.Free.Status[l1]=="Recurred/Progressed", 1, ifelse(BrClin$Disease.Free.Status[l1]=="DiseaseFree", 0, NA)))
  tval=TCGAssgsea[1, lx2]-TCGAssgsea[2, lx2]
   axb=coxph(ax~tval+BrClin$Stage[l1])
  axc=summary(axb)
  TCGAvalCut=cut(tval, quantile(tval, c(0, 0.33, 0.67, 1)), c("L","M", "P"))
  ax2=plot(survfit(ax~TCGAvalCut), main=paste(i, "lum growing"), col=brewer.pal(3, "Blues"),lwd=2, xlab="Time (months)", ylab="DFS", mark.time=T)
    text(10, 0, sprintf("HR=%s (%s-%s), p=%s", round(axc$coefficients[1,2], 2),
                      round(axc$conf.int[1,3],2), round(axc$conf.int[1,4],2),round(axc$logtest[3],2 )))
    legend("topleft",paste(c("L", "M", "H"), summary(TCGAvalCut)), col=brewer.pal(3, "Blues"), lwd=1,
          pch=19)
    
    ay=Surv(BrClin$Overall.Survival..Months.[l1], ifelse(BrClin$Overall.Survival.Status[l1]=="DECEASED", 1, ifelse(BrClin$Overall.Survival.Status[l1]=="LIVING", 0, NA)))
   ayb=coxph(ay~tval+BrClin$Stage[l1])  
   ayc=summary(ayb)
    ax2=plot(survfit(ay~TCGAvalCut), main=paste(i, "lum growing"), col=brewer.pal(3, "Blues"),lwd=2, xlab="Time", ylab="OS", mark.time=T)
   text(10, 0, sprintf("HR=%s (%s-%s), p=%s", round(ayc$coefficients[1,2], 2),
                      round(ayc$conf.int[1,3],2), round(ayc$conf.int[1,4],2),round(ayc$logtest[3],2 )))
   legend("topleft",paste(c("L", "M", "H"), summary(TCGAvalCut)), col=brewer.pal(3, "Blues"), lwd=1,
          pch=19)  
  
}


#dev.off()

We can see whether this signature associates with leukocyte content and TCR diversity in the TCGA cohort:


l1=which(BrClin$PAM50=="LumA")
lx2=match(BrClin$Patient.ID[l1], colnames(TCGAssgsea))

lx3=match(BrClin$Patient.ID[l1], ThorssData$TCGA.Participant.Barcode)


newinfoTab=data.frame(pat=BrClin$Patient.ID[l1], TCGAgrow=TCGAssgsea[1, lx2],
                      TCGAstab=TCGAssgsea[2, lx2],
                      TCGAcomb=TCGAssgsea[1, lx2]-TCGAssgsea[2, lx2],
                      Onco=TCGAssgsea[3, lx2],
                      Mamma=TCGAssgsea[4, lx2],
                      OSM=BrClin$Overall.Survival..Months.[l1],
                      OSS=BrClin$Overall.Survival.Status[l1], DFS=BrClin$Disease.Free.Status[l1],
                 DFM=BrClin$Disease.Free..Months.[l1],
                 Stage=BrClin$Stage[l1],
                      leuk=as.numeric(ThorssData$Leukocyte.Fraction[lx3]),
                      str=as.numeric(ThorssData$Stromal.Fraction[lx3]),
                      TCRshann=as.numeric(ThorssData$TCR.Shannon[lx3]),
                      immSub=ThorssData$Immune.Subtype[lx3])


#pdf("figure-outputs/NewpanelC.pdf", height=8, width=9)
par(mfrow=c(2,2))
ax=cor.test(newinfoTab$TCGAgrow, newinfoTab$leuk, method="spearman")
smoothScatter(newinfoTab$TCGAgrow, newinfoTab$leuk,xlab="Luminal signature",
            ylab="Leukoycte")
text(0.6,0.6, sprintf("cor=%s, p=%s", round(ax$estimate, 2), round(ax$p.value, 2)))
ax=cor.test(newinfoTab$TCGAgrow, newinfoTab$str, method="spearman")
smoothScatter(newinfoTab$TCGAgrow, newinfoTab$str, xlab="Luminal signature",
            ylab="Stroma")
text(0.6,0.6, sprintf("cor=%s, p=%s", round(ax$estimate, 2), round(ax$p.value, 2)))
ax=cor.test(newinfoTab$TCGAgrow, newinfoTab$TCRshann, method="spearman")
smoothScatter(newinfoTab$TCGAgrow, newinfoTab$TCRshann,xlab="Luminal signature", ylab="TCR div")
text(0.6,5, sprintf("cor=%s, p=%s", round(ax$estimate, 2), round(ax$p.value, 9)))
boxplot(newinfoTab$TCGAgrow~newinfoTab$immSub)

Figure 15.14: luminal signature associated with clinical variables

#dev.off()

DT::datatable(newinfoTab, rownames=T, class='cell-border stripe',
          extensions="Buttons", options=list(dom="Bfrtip", buttons=c('csv', 'excel'), Xscroll=T))

Figure 15.14: luminal signature associated with clinical variables

As well as whether it associates with any of the previously identified immune subtypes:

ggplot(newinfoTab, aes(x=immSub, y=TCGAstab))+geom_boxplot()+geom_point()+theme_bw()

write.csv(newinfoTab, file="nature-tables/5h_lum_growing_vs_stable.csv")

15.4 Comparison to oncotype and mammaprint

Finally, see how these compare to oncotype or mammaprint.

We can do this two ways:

15.4.1 Overall survival

Run survival ~ Oncotype/Mamma/Growing + PAM50 + Stage

SurvOS=Surv(newinfoTab$OSM, ifelse(newinfoTab$OSS=="LIVING", 0, 1))
SurvDFS=Surv(newinfoTab$DFM, ifelse(newinfoTab$DFS=="DiseaseFree", 0,ifelse(newinfoTab$DFS=="Recurred/Progressed", 1, NA) ))

model1=coxph(SurvOS~TCGAgrow+Stage, data=newinfoTab)
summary(model1)
## Call:
## coxph(formula = SurvOS ~ TCGAgrow + Stage, data = newinfoTab)
## 
##   n= 417, number of events= 27 
##    (2 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)   
## TCGAgrow  0.2753    1.3169   0.2012  1.368  0.17131   
## Stage2    0.7334    2.0821   0.4799  1.528  0.12644   
## Stage3   -0.9157    0.4003   1.0805 -0.847  0.39677   
## Stage4    2.0093    7.4585   0.6467  3.107  0.00189 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAgrow    1.3169     0.7594   0.88770     1.954
## Stage2      2.0821     0.4803   0.81290     5.333
## Stage3      0.4003     2.4984   0.04815     3.327
## Stage4      7.4585     0.1341   2.09976    26.493
## 
## Concordance= 0.667  (se = 0.058 )
## Likelihood ratio test= 13.72  on 4 df,   p=0.008
## Wald test            = 14.54  on 4 df,   p=0.006
## Score (logrank) test = 18.36  on 4 df,   p=0.001
model2=coxph(SurvOS~TCGAgrow+Onco+Stage, data=newinfoTab)
summary(model2)
## Call:
## coxph(formula = SurvOS ~ TCGAgrow + Onco + Stage, data = newinfoTab)
## 
##   n= 417, number of events= 27 
##    (2 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)   
## TCGAgrow  0.3674    1.4440   0.2077  1.769  0.07694 . 
## Onco     -0.6667    0.5134   0.2402 -2.776  0.00551 **
## Stage2    0.8442    2.3261   0.4839  1.744  0.08109 . 
## Stage3   -0.9671    0.3802   1.0807 -0.895  0.37087   
## Stage4    2.0347    7.6502   0.6615  3.076  0.00210 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAgrow    1.4440     0.6925   0.96106    2.1695
## Onco        0.5134     1.9478   0.32062    0.8221
## Stage2      2.3261     0.4299   0.90094    6.0055
## Stage3      0.3802     2.6303   0.04572    3.1617
## Stage4      7.6502     0.1307   2.09232   27.9719
## 
## Concordance= 0.713  (se = 0.049 )
## Likelihood ratio test= 21.12  on 5 df,   p=8e-04
## Wald test            = 22.82  on 5 df,   p=4e-04
## Score (logrank) test = 26.15  on 5 df,   p=8e-05
model3=coxph(SurvOS~TCGAgrow+Mamma+Stage, data=newinfoTab)
summary(model3)
## Call:
## coxph(formula = SurvOS ~ TCGAgrow + Mamma + Stage, data = newinfoTab)
## 
##   n= 417, number of events= 27 
##    (2 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)   
## TCGAgrow  0.2715    1.3119   0.2019  1.345  0.17878   
## Mamma    -0.0464    0.9547   0.2459 -0.189  0.85035   
## Stage2    0.7497    2.1164   0.4874  1.538  0.12401   
## Stage3   -0.9201    0.3985   1.0809 -0.851  0.39463   
## Stage4    2.0398    7.6894   0.6658  3.064  0.00219 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAgrow    1.3119     0.7622   0.88314     1.949
## Mamma       0.9547     1.0475   0.58954     1.546
## Stage2      2.1164     0.4725   0.81415     5.502
## Stage3      0.3985     2.5095   0.04791     3.315
## Stage4      7.6894     0.1300   2.08531    28.354
## 
## Concordance= 0.664  (se = 0.058 )
## Likelihood ratio test= 13.76  on 5 df,   p=0.02
## Wald test            = 14.48  on 5 df,   p=0.01
## Score (logrank) test = 18.36  on 5 df,   p=0.003
modelos=coxph(SurvOS~TCGAgrow+Mamma+Onco+Stage, data=newinfoTab)
summary(modelos)
## Call:
## coxph(formula = SurvOS ~ TCGAgrow + Mamma + Onco + Stage, data = newinfoTab)
## 
##   n= 417, number of events= 27 
##    (2 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)   
## TCGAgrow  0.4387    1.5507   0.2195  1.999  0.04565 * 
## Mamma     0.3709    1.4491   0.2849  1.302  0.19290   
## Onco     -0.8402    0.4316   0.2690 -3.123  0.00179 **
## Stage2    0.7444    2.1052   0.4932  1.509  0.13123   
## Stage3   -0.9419    0.3899   1.0810 -0.871  0.38361   
## Stage4    1.8090    6.1043   0.6920  2.614  0.00895 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAgrow    1.5507     0.6449   1.00851    2.3845
## Mamma       1.4491     0.6901   0.82908    2.5328
## Onco        0.4316     2.3168   0.25476    0.7313
## Stage2      2.1052     0.4750   0.80068    5.5351
## Stage3      0.3899     2.5647   0.04686    3.2443
## Stage4      6.1043     0.1638   1.57250   23.6964
## 
## Concordance= 0.742  (se = 0.044 )
## Likelihood ratio test= 22.88  on 6 df,   p=8e-04
## Wald test            = 25.63  on 6 df,   p=3e-04
## Score (logrank) test = 28.26  on 6 df,   p=8e-05

Also try the stable signature:

model1=coxph(SurvOS~TCGAstab+Stage, data=newinfoTab)
summary(model1)
## Call:
## coxph(formula = SurvOS ~ TCGAstab + Stage, data = newinfoTab)
## 
##   n= 417, number of events= 27 
##    (2 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)   
## TCGAstab -0.1176    0.8890   0.2232 -0.527  0.59818   
## Stage2    0.7431    2.1025   0.4796  1.549  0.12128   
## Stage3   -0.8446    0.4297   1.0810 -0.781  0.43461   
## Stage4    2.0023    7.4064   0.6553  3.055  0.00225 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAstab    0.8890     1.1248   0.57400     1.377
## Stage2      2.1025     0.4756   0.82127     5.382
## Stage3      0.4297     2.3271   0.05165     3.575
## Stage4      7.4064     0.1350   2.05019    26.756
## 
## Concordance= 0.67  (se = 0.063 )
## Likelihood ratio test= 12.19  on 4 df,   p=0.02
## Wald test            = 13.06  on 4 df,   p=0.01
## Score (logrank) test = 16.83  on 4 df,   p=0.002
model2=coxph(SurvOS~TCGAstab+Onco+Stage, data=newinfoTab)
summary(model2)
## Call:
## coxph(formula = SurvOS ~ TCGAstab + Onco + Stage, data = newinfoTab)
## 
##   n= 417, number of events= 27 
##    (2 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)   
## TCGAstab -0.2364    0.7895   0.2126 -1.112  0.26621   
## Onco     -0.6851    0.5040   0.2530 -2.708  0.00677 **
## Stage2    0.8198    2.2700   0.4817  1.702  0.08879 . 
## Stage3   -0.8871    0.4118   1.0815 -0.820  0.41206   
## Stage4    1.8823    6.5685   0.6615  2.845  0.00443 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAstab    0.7895     1.2667   0.52039    1.1976
## Onco        0.5040     1.9840   0.30697    0.8276
## Stage2      2.2700     0.4405   0.88307    5.8353
## Stage3      0.4118     2.4281   0.04945    3.4300
## Stage4      6.5685     0.1522   1.79635   24.0184
## 
## Concordance= 0.703  (se = 0.055 )
## Likelihood ratio test= 19.3  on 5 df,   p=0.002
## Wald test            = 21.8  on 5 df,   p=6e-04
## Score (logrank) test = 24.82  on 5 df,   p=2e-04
model3=coxph(SurvOS~TCGAstab+Mamma+Stage, data=newinfoTab)
summary(model3)
## Call:
## coxph(formula = SurvOS ~ TCGAstab + Mamma + Stage, data = newinfoTab)
## 
##   n= 417, number of events= 27 
##    (2 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)   
## TCGAstab -0.1396    0.8697   0.2273 -0.614  0.53914   
## Mamma    -0.1076    0.8980   0.2465 -0.437  0.66246   
## Stage2    0.7775    2.1761   0.4856  1.601  0.10934   
## Stage3   -0.8495    0.4276   1.0811 -0.786  0.43202   
## Stage4    2.0485    7.7561   0.6618  3.095  0.00197 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAstab    0.8697     1.1498   0.55712     1.358
## Mamma       0.8980     1.1136   0.55393     1.456
## Stage2      2.1761     0.4595   0.84010     5.637
## Stage3      0.4276     2.3384   0.05139     3.559
## Stage4      7.7561     0.1289   2.11982    28.379
## 
## Concordance= 0.654  (se = 0.06 )
## Likelihood ratio test= 12.38  on 5 df,   p=0.03
## Wald test            = 13.21  on 5 df,   p=0.02
## Score (logrank) test = 17  on 5 df,   p=0.005
model3=coxph(SurvOS~TCGAstab+Mamma+Onco+Stage, data=newinfoTab)
summary(model3)
## Call:
## coxph(formula = SurvOS ~ TCGAstab + Mamma + Onco + Stage, data = newinfoTab)
## 
##   n= 417, number of events= 27 
##    (2 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)   
## TCGAstab -0.2170    0.8049   0.2157 -1.006  0.31446   
## Mamma     0.2149    1.2397   0.2689  0.799  0.42426   
## Onco     -0.7796    0.4586   0.2770 -2.814  0.00489 **
## Stage2    0.7567    2.1312   0.4896  1.546  0.12220   
## Stage3   -0.8906    0.4104   1.0816 -0.823  0.41029   
## Stage4    1.7519    5.7654   0.6893  2.542  0.01103 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAstab    0.8049     1.2424   0.52738    1.2285
## Mamma       1.2397     0.8067   0.73187    2.0998
## Onco        0.4586     2.1805   0.26648    0.7893
## Stage2      2.1312     0.4692   0.81640    5.5633
## Stage3      0.4104     2.4365   0.04927    3.4189
## Stage4      5.7654     0.1734   1.49325   22.2597
## 
## Concordance= 0.714  (se = 0.053 )
## Likelihood ratio test= 19.94  on 6 df,   p=0.003
## Wald test            = 23.21  on 6 df,   p=7e-04
## Score (logrank) test = 25.73  on 6 df,   p=3e-04

Combine the signatures such that it is Growing - Stable

model1=coxph(SurvOS~TCGAcomb+Stage, data=newinfoTab)
summary(model1)
## Call:
## coxph(formula = SurvOS ~ TCGAcomb + Stage, data = newinfoTab)
## 
##   n= 417, number of events= 27 
##    (2 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)   
## TCGAcomb  0.1683    1.1832   0.1364  1.234  0.21736   
## Stage2    0.7243    2.0634   0.4801  1.509  0.13138   
## Stage3   -0.8655    0.4208   1.0802 -0.801  0.42298   
## Stage4    1.9350    6.9244   0.6553  2.953  0.00315 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAcomb    1.1832     0.8451   0.90567     1.546
## Stage2      2.0634     0.4846   0.80520     5.288
## Stage3      0.4208     2.3762   0.05066     3.496
## Stage4      6.9244     0.1444   1.91683    25.014
## 
## Concordance= 0.668  (se = 0.059 )
## Likelihood ratio test= 13.4  on 4 df,   p=0.009
## Wald test            = 14.43  on 4 df,   p=0.006
## Score (logrank) test = 18.23  on 4 df,   p=0.001
model2=coxph(SurvOS~TCGAcomb+Onco+Stage, data=newinfoTab)
summary(model2)
## Call:
## coxph(formula = SurvOS ~ TCGAcomb + Onco + Stage, data = newinfoTab)
## 
##   n= 417, number of events= 27 
##    (2 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)   
## TCGAcomb  0.2451    1.2777   0.1312  1.868  0.06172 . 
## Onco     -0.7124    0.4904   0.2468 -2.887  0.00389 **
## Stage2    0.8306    2.2946   0.4834  1.718  0.08576 . 
## Stage3   -0.9125    0.4015   1.0812 -0.844  0.39870   
## Stage4    1.9323    6.9054   0.6552  2.949  0.00319 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAcomb    1.2777     0.7827   0.98805    1.6523
## Onco        0.4904     2.0390   0.30236    0.7955
## Stage2      2.2946     0.4358   0.88972    5.9180
## Stage3      0.4015     2.4905   0.04824    3.3423
## Stage4      6.9054     0.1448   1.91180   24.9423
## 
## Concordance= 0.709  (se = 0.053 )
## Likelihood ratio test= 21.38  on 5 df,   p=7e-04
## Wald test            = 23.23  on 5 df,   p=3e-04
## Score (logrank) test = 26.52  on 5 df,   p=7e-05
model3=coxph(SurvOS~TCGAcomb+Mamma+Stage, data=newinfoTab)
summary(model3)
## Call:
## coxph(formula = SurvOS ~ TCGAcomb + Mamma + Stage, data = newinfoTab)
## 
##   n= 417, number of events= 27 
##    (2 observations deleted due to missingness)
## 
##              coef exp(coef) se(coef)      z Pr(>|z|)   
## TCGAcomb  0.17166   1.18727  0.13577  1.264  0.20610   
## Mamma    -0.09884   0.90588  0.24536 -0.403  0.68706   
## Stage2    0.75782   2.13362  0.48682  1.557  0.11955   
## Stage3   -0.87486   0.41692  1.08048 -0.810  0.41811   
## Stage4    1.99668   7.36459  0.67001  2.980  0.00288 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAcomb    1.1873     0.8423   0.90988     1.549
## Mamma       0.9059     1.1039   0.56005     1.465
## Stage2      2.1336     0.4687   0.82174     5.540
## Stage3      0.4169     2.3985   0.05016     3.465
## Stage4      7.3646     0.1358   1.98078    27.382
## 
## Concordance= 0.661  (se = 0.059 )
## Likelihood ratio test= 13.57  on 5 df,   p=0.02
## Wald test            = 14.42  on 5 df,   p=0.01
## Score (logrank) test = 18.3  on 5 df,   p=0.003
model3=coxph(SurvOS~TCGAcomb+Mamma+Onco+Stage, data=newinfoTab)
summary(model3)
## Call:
## coxph(formula = SurvOS ~ TCGAcomb + Mamma + Onco + Stage, data = newinfoTab)
## 
##   n= 417, number of events= 27 
##    (2 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)   
## TCGAcomb  0.2543    1.2896   0.1323  1.922  0.05462 . 
## Mamma     0.2801    1.3232   0.2770  1.011  0.31201   
## Onco     -0.8417    0.4310   0.2746 -3.066  0.00217 **
## Stage2    0.7465    2.1096   0.4924  1.516  0.12954   
## Stage3   -0.9075    0.4035   1.0816 -0.839  0.40145   
## Stage4    1.7365    5.6776   0.6903  2.516  0.01188 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAcomb    1.2896     0.7755   0.99498    1.6714
## Mamma       1.3232     0.7557   0.76883    2.2774
## Onco        0.4310     2.3204   0.25161    0.7381
## Stage2      2.1096     0.4740   0.80360    5.5379
## Stage3      0.4035     2.4781   0.04844    3.3615
## Stage4      5.6776     0.1761   1.46751   21.9661
## 
## Concordance= 0.728  (se = 0.05 )
## Likelihood ratio test= 22.42  on 6 df,   p=0.001
## Wald test            = 25.3  on 6 df,   p=3e-04
## Score (logrank) test = 28.01  on 6 df,   p=9e-05

15.4.2 DFS survival

Run survival ~ Oncotype/Mamma/Growing + PAM50 + Stage

#SurvOS=Surv(newinfoTab$OSM, ifelse(newinfoTab$OSS=="LIVING", 0, 1))
SurvDFS=Surv(newinfoTab$DFM, ifelse(newinfoTab$DFS=="DiseaseFree", 0,ifelse(newinfoTab$DFS=="Recurred/Progressed", 1, NA) ))

model1=coxph(SurvDFS~TCGAgrow+Stage, data=newinfoTab)
summary(model1)
## Call:
## coxph(formula = SurvDFS ~ TCGAgrow + Stage, data = newinfoTab)
## 
##   n= 297, number of events= 30 
##    (122 observations deleted due to missingness)
## 
##            coef exp(coef) se(coef)     z Pr(>|z|)   
## TCGAgrow 0.5325    1.7032   0.2003 2.658  0.00786 **
## Stage2   0.1482    1.1598   0.4742 0.313  0.75465   
## Stage3   0.7633    2.1455   0.5768 1.323  0.18570   
## Stage4   1.1378    3.1200   0.8184 1.390  0.16445   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAgrow     1.703     0.5871    1.1501     2.522
## Stage2       1.160     0.8622    0.4578     2.938
## Stage3       2.145     0.4661    0.6927     6.645
## Stage4       3.120     0.3205    0.6273    15.517
## 
## Concordance= 0.678  (se = 0.045 )
## Likelihood ratio test= 11.87  on 4 df,   p=0.02
## Wald test            = 12.45  on 4 df,   p=0.01
## Score (logrank) test = 13.21  on 4 df,   p=0.01
model2=coxph(SurvDFS~TCGAgrow+Onco+Stage, data=newinfoTab)
summary(model2)
## Call:
## coxph(formula = SurvDFS ~ TCGAgrow + Onco + Stage, data = newinfoTab)
## 
##   n= 297, number of events= 30 
##    (122 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)   
## TCGAgrow  0.5488    1.7311   0.2011  2.729  0.00636 **
## Onco     -0.3221    0.7246   0.2402 -1.341  0.17998   
## Stage2    0.1603    1.1739   0.4734  0.339  0.73488   
## Stage3    0.7569    2.1316   0.5754  1.315  0.18835   
## Stage4    1.1400    3.1269   0.8162  1.397  0.16250   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAgrow    1.7311     0.5777    1.1672     2.568
## Onco        0.7246     1.3801    0.4525     1.160
## Stage2      1.1739     0.8519    0.4641     2.969
## Stage3      2.1316     0.4691    0.6902     6.584
## Stage4      3.1269     0.3198    0.6314    15.484
## 
## Concordance= 0.675  (se = 0.051 )
## Likelihood ratio test= 13.59  on 5 df,   p=0.02
## Wald test            = 13.64  on 5 df,   p=0.02
## Score (logrank) test = 14.74  on 5 df,   p=0.01
model3=coxph(SurvDFS~TCGAgrow+Mamma+Stage, data=newinfoTab)
summary(model3)
## Call:
## coxph(formula = SurvDFS ~ TCGAgrow + Mamma + Stage, data = newinfoTab)
## 
##   n= 297, number of events= 30 
##    (122 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)  
## TCGAgrow  0.5095    1.6644   0.2071  2.460   0.0139 *
## Mamma    -0.1051    0.9002   0.2530 -0.415   0.6779  
## Stage2    0.1770    1.1936   0.4793  0.369   0.7120  
## Stage3    0.7648    2.1485   0.5774  1.324   0.1854  
## Stage4    1.2554    3.5092   0.8657  1.450   0.1470  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAgrow    1.6644     0.6008    1.1091     2.498
## Mamma       0.9002     1.1108    0.5483     1.478
## Stage2      1.1936     0.8378    0.4666     3.053
## Stage3      2.1485     0.4654    0.6928     6.663
## Stage4      3.5092     0.2850    0.6432    19.146
## 
## Concordance= 0.679  (se = 0.044 )
## Likelihood ratio test= 12.04  on 5 df,   p=0.03
## Wald test            = 12.6  on 5 df,   p=0.03
## Score (logrank) test = 13.37  on 5 df,   p=0.02
modeldfs=coxph(SurvDFS~TCGAgrow+Mamma+Onco+Stage, data=newinfoTab)
summary(modeldfs)
## Call:
## coxph(formula = SurvDFS ~ TCGAgrow + Mamma + Onco + Stage, data = newinfoTab)
## 
##   n= 297, number of events= 30 
##    (122 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)   
## TCGAgrow  0.5752    1.7774   0.2167  2.654  0.00795 **
## Mamma     0.1001    1.1052   0.2934  0.341  0.73306   
## Onco     -0.3749    0.6873   0.2852 -1.315  0.18865   
## Stage2    0.1335    1.1429   0.4799  0.278  0.78079   
## Stage3    0.7467    2.1100   0.5757  1.297  0.19467   
## Stage4    1.0304    2.8022   0.8769  1.175  0.23995   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAgrow    1.7774     0.5626    1.1624     2.718
## Mamma       1.1052     0.9048    0.6219     1.964
## Onco        0.6873     1.4549    0.3930     1.202
## Stage2      1.1429     0.8750    0.4462     2.927
## Stage3      2.1100     0.4739    0.6827     6.522
## Stage4      2.8022     0.3569    0.5025    15.628
## 
## Concordance= 0.677  (se = 0.052 )
## Likelihood ratio test= 13.71  on 6 df,   p=0.03
## Wald test            = 13.67  on 6 df,   p=0.03
## Score (logrank) test = 14.82  on 6 df,   p=0.02

Also try the growing signature:

model1=coxph(SurvDFS~TCGAstab+Stage, data=newinfoTab)
summary(model1)
## Call:
## coxph(formula = SurvDFS ~ TCGAstab + Stage, data = newinfoTab)
## 
##   n= 297, number of events= 30 
##    (122 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)  
## TCGAstab -0.1088    0.8970   0.2177 -0.499   0.6175  
## Stage2    0.3715    1.4499   0.4690  0.792   0.4283  
## Stage3    1.0529    2.8661   0.5661  1.860   0.0629 .
## Stage4    1.3410    3.8229   0.8205  1.634   0.1022  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAstab     0.897     1.1149    0.5854     1.374
## Stage2       1.450     0.6897    0.5783     3.635
## Stage3       2.866     0.3489    0.9449     8.693
## Stage4       3.823     0.2616    0.7656    19.090
## 
## Concordance= 0.628  (se = 0.071 )
## Likelihood ratio test= 5  on 4 df,   p=0.3
## Wald test            = 5.5  on 4 df,   p=0.2
## Score (logrank) test = 5.93  on 4 df,   p=0.2
model2=coxph(SurvDFS~TCGAstab+Onco+Stage, data=newinfoTab)
summary(model2)
## Call:
## coxph(formula = SurvDFS ~ TCGAstab + Onco + Stage, data = newinfoTab)
## 
##   n= 297, number of events= 30 
##    (122 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)  
## TCGAstab -0.1573    0.8545   0.2182 -0.721    0.471  
## Onco     -0.3203    0.7260   0.2473 -1.295    0.195  
## Stage2    0.3435    1.4099   0.4713  0.729    0.466  
## Stage3    0.9986    2.7145   0.5724  1.745    0.081 .
## Stage4    1.1683    3.2165   0.8389  1.393    0.164  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAstab    0.8545     1.1703    0.5571     1.311
## Onco        0.7260     1.3775    0.4471     1.179
## Stage2      1.4099     0.7093    0.5598     3.551
## Stage3      2.7145     0.3684    0.8841     8.335
## Stage4      3.2165     0.3109    0.6213    16.650
## 
## Concordance= 0.625  (se = 0.072 )
## Likelihood ratio test= 6.61  on 5 df,   p=0.3
## Wald test            = 7.49  on 5 df,   p=0.2
## Score (logrank) test = 7.93  on 5 df,   p=0.2
model3=coxph(SurvDFS~TCGAstab+Mamma+Stage, data=newinfoTab)
summary(model3)
## Call:
## coxph(formula = SurvDFS ~ TCGAstab + Mamma + Stage, data = newinfoTab)
## 
##   n= 297, number of events= 30 
##    (122 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)  
## TCGAstab -0.1710    0.8428   0.2217 -0.771   0.4405  
## Mamma    -0.3033    0.7383   0.2478 -1.224   0.2209  
## Stage2    0.3992    1.4906   0.4709  0.848   0.3967  
## Stage3    1.0049    2.7316   0.5699  1.763   0.0778 .
## Stage4    1.5634    4.7752   0.8418  1.857   0.0633 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAstab    0.8428     1.1865    0.5458     1.301
## Mamma       0.7383     1.3544    0.4543     1.200
## Stage2      1.4906     0.6709    0.5922     3.752
## Stage3      2.7316     0.3661    0.8940     8.346
## Stage4      4.7752     0.2094    0.9171    24.864
## 
## Concordance= 0.646  (se = 0.073 )
## Likelihood ratio test= 6.46  on 5 df,   p=0.3
## Wald test            = 6.97  on 5 df,   p=0.2
## Score (logrank) test = 7.43  on 5 df,   p=0.2
model3=coxph(SurvDFS~TCGAstab+Mamma+Onco+Stage, data=newinfoTab)
summary(model3)
## Call:
## coxph(formula = SurvDFS ~ TCGAstab + Mamma + Onco + Stage, data = newinfoTab)
## 
##   n= 297, number of events= 30 
##    (122 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)  
## TCGAstab -0.1831    0.8327   0.2207 -0.830   0.4068  
## Mamma    -0.1949    0.8229   0.2804 -0.695   0.4870  
## Onco     -0.2271    0.7968   0.2823 -0.804   0.4212  
## Stage2    0.3705    1.4484   0.4732  0.783   0.4337  
## Stage3    0.9906    2.6929   0.5719  1.732   0.0832 .
## Stage4    1.3673    3.9249   0.8830  1.548   0.1215  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAstab    0.8327     1.2009    0.5403     1.283
## Mamma       0.8229     1.2152    0.4750     1.426
## Onco        0.7968     1.2550    0.4582     1.386
## Stage2      1.4484     0.6904    0.5730     3.661
## Stage3      2.6929     0.3714    0.8779     8.260
## Stage4      3.9249     0.2548    0.6953    22.154
## 
## Concordance= 0.636  (se = 0.073 )
## Likelihood ratio test= 7.1  on 6 df,   p=0.3
## Wald test            = 7.8  on 6 df,   p=0.3
## Score (logrank) test = 8.32  on 6 df,   p=0.2

Combine the signatures such that it is Growing - Stable

model1=coxph(SurvDFS~TCGAcomb+Stage, data=newinfoTab)
summary(model1)
## Call:
## coxph(formula = SurvDFS ~ TCGAcomb + Stage, data = newinfoTab)
## 
##   n= 297, number of events= 30 
##    (122 observations deleted due to missingness)
## 
##            coef exp(coef) se(coef)     z Pr(>|z|)  
## TCGAcomb 0.3266    1.3862   0.1456 2.243   0.0249 *
## Stage2   0.1817    1.1993   0.4746 0.383   0.7017  
## Stage3   0.8415    2.3197   0.5711 1.473   0.1407  
## Stage4   1.0605    2.8879   0.8260 1.284   0.1992  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAcomb     1.386     0.7214    1.0421     1.844
## Stage2       1.199     0.8338    0.4731     3.040
## Stage3       2.320     0.4311    0.7574     7.105
## Stage4       2.888     0.3463    0.5721    14.577
## 
## Concordance= 0.656  (se = 0.062 )
## Likelihood ratio test= 9.82  on 4 df,   p=0.04
## Wald test            = 10.3  on 4 df,   p=0.04
## Score (logrank) test = 10.92  on 4 df,   p=0.03
model2=coxph(SurvDFS~TCGAcomb+Onco+Stage, data=newinfoTab)
summary(model2)
## Call:
## coxph(formula = SurvDFS ~ TCGAcomb + Onco + Stage, data = newinfoTab)
## 
##   n= 297, number of events= 30 
##    (122 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)  
## TCGAcomb  0.3713    1.4496   0.1494  2.485   0.0129 *
## Onco     -0.3967    0.6725   0.2446 -1.621   0.1049  
## Stage2    0.1481    1.1596   0.4755  0.311   0.7554  
## Stage3    0.7555    2.1287   0.5766  1.310   0.1901  
## Stage4    0.9863    2.6812   0.8219  1.200   0.2302  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAcomb    1.4496     0.6898    1.0817     1.943
## Onco        0.6725     1.4869    0.4164     1.086
## Stage2      1.1596     0.8623    0.4567     2.945
## Stage3      2.1287     0.4698    0.6875     6.591
## Stage4      2.6812     0.3730    0.5354    13.427
## 
## Concordance= 0.627  (se = 0.069 )
## Likelihood ratio test= 12.32  on 5 df,   p=0.03
## Wald test            = 12.57  on 5 df,   p=0.03
## Score (logrank) test = 13.42  on 5 df,   p=0.02
model3=coxph(SurvDFS~TCGAcomb+Mamma+Stage, data=newinfoTab)
summary(model3)
## Call:
## coxph(formula = SurvDFS ~ TCGAcomb + Mamma + Stage, data = newinfoTab)
## 
##   n= 297, number of events= 30 
##    (122 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)  
## TCGAcomb  0.3222    1.3802   0.1440  2.237   0.0253 *
## Mamma    -0.2480    0.7804   0.2384 -1.040   0.2982  
## Stage2    0.2179    1.2435   0.4766  0.457   0.6475  
## Stage3    0.8069    2.2410   0.5740  1.406   0.1598  
## Stage4    1.3004    3.6709   0.8577  1.516   0.1295  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAcomb    1.3802     0.7245    1.0407     1.830
## Mamma       0.7804     1.2815    0.4891     1.245
## Stage2      1.2435     0.8042    0.4887     3.164
## Stage3      2.2410     0.4462    0.7276     6.902
## Stage4      3.6709     0.2724    0.6834    19.719
## 
## Concordance= 0.659  (se = 0.062 )
## Likelihood ratio test= 10.88  on 5 df,   p=0.05
## Wald test            = 11.44  on 5 df,   p=0.04
## Score (logrank) test = 12.04  on 5 df,   p=0.03
model3=coxph(SurvDFS~TCGAcomb+Mamma+Onco+Stage, data=newinfoTab)
summary(model3)
## Call:
## coxph(formula = SurvDFS ~ TCGAcomb + Mamma + Onco + Stage, data = newinfoTab)
## 
##   n= 297, number of events= 30 
##    (122 observations deleted due to missingness)
## 
##              coef exp(coef) se(coef)      z Pr(>|z|)  
## TCGAcomb  0.36521   1.44082  0.15044  2.428   0.0152 *
## Mamma    -0.07269   0.92989  0.27298 -0.266   0.7900  
## Onco     -0.35758   0.69937  0.28588 -1.251   0.2110  
## Stage2    0.16343   1.17754  0.47896  0.341   0.7329  
## Stage3    0.76071   2.13980  0.57642  1.320   0.1869  
## Stage4    1.06660   2.90548  0.87537  1.218   0.2231  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## TCGAcomb    1.4408     0.6940    1.0729     1.935
## Mamma       0.9299     1.0754    0.5446     1.588
## Onco        0.6994     1.4299    0.3994     1.225
## Stage2      1.1775     0.8492    0.4606     3.011
## Stage3      2.1398     0.4673    0.6914     6.623
## Stage4      2.9055     0.3442    0.5225    16.156
## 
## Concordance= 0.629  (se = 0.068 )
## Likelihood ratio test= 12.39  on 6 df,   p=0.05
## Wald test            = 12.65  on 6 df,   p=0.05
## Score (logrank) test = 13.5  on 6 df,   p=0.04

Create a forest plot for the above: First DFS

data1=data.frame(Feat=c(  "Feat","Stage", "","","","Signature", "Mammaprint", "OncotypeDX"),
                Cat=c( "Val","1", "2", "3", "4","Signature", "Mammaprint","OncotypeDX"),
                N=c("N" ,table(newinfoTab$Stage), rep(nrow(newinfoTab), 3)),
                HR=c("HR",  1, round(summary(modeldfs)$coefficients[4:6, 2], 2), round(summary(modeldfs)$coefficients[1:3, 2], 2)),
                p=c("p" ,"NA", round(summary(modeldfs)$coefficients[4:6, 5], 2), round(summary(modeldfs)$coefficients[1:3, 5], 2)) )

mdata1=data.frame(rbind(NA,NA, round(summary(modeldfs)$conf.int[c(4:6, 1:3) ,c(1,3:4)], 2)))
#mdata=rbind(mdata, NA,  round(summary(modeldfs)$conf.int[4:6 ,c(1,3:4)], 2))

#pdf("figure-outputs/Figure5_CD74_forestplot.pdf", width=5, height=5)

forestplot(data1, mdata1, xlog=T,
           boxsize=0.2)

Next OS

data1=data.frame(Feat=c(  "Feat","Stage", "","","","Signature", "Mammaprint", "OncotypeDX"),
                Cat=c( "Val","1", "2", "3", "4","Signature", "Mammaprint","OncotypeDX"),
                N=c("N" ,table(newinfoTab$Stage), rep(nrow(newinfoTab), 3)),
                HR=c("HR",  1, round(summary(modelos)$coefficients[4:6, 2], 2), round(summary(modeldfs)$coefficients[1:3, 2], 2)),
                p=c("p" ,"NA", round(summary(modelos)$coefficients[4:6, 5], 2), round(summary(modeldfs)$coefficients[1:3, 5], 2)) )

mdata1=data.frame(rbind(NA,NA, round(summary(modelos)$conf.int[c(4:6, 1:3) ,c(1,3:4)], 2)))
#mdata=rbind(mdata, NA,  round(summary(modeldfs)$conf.int[4:6 ,c(1,3:4)], 2))

#pdf("figure-outputs/Figure5_CD74_forestplot.pdf", width=5, height=5)

forestplot(data1, mdata1, xlog=T,
           boxsize=0.2)

15.4.3 TCGA: LumA high vs low signatures

We can pull out the samples here which have high vs low signature (in the lumA group) and perform differential gene expression analysis on these cases:

newinfoTab$TCGAvalcut=cut(newinfoTab$TCGAgrow,  quantile(newinfoTab$TCGAgrow, c(0, 0.33, 0.67, 1) ), c("L", "M", "O"))
newinfoTab$TCGAvalcut[184]="L"

colDataTCGA=newinfoTab#[which(newinfoTab$TCGAvalcut!="M"), ]

TCGAdeseq=DESeqDataSetFromMatrix(round(TCGArsem2[ ,match(colDataTCGA$pat, colnames(TCGArsem2))]), colDataTCGA, design=~TCGAvalcut)

TCGAdeseq=DESeq(TCGAdeseq)

TCGAdeseqRes=results(TCGAdeseq, contrast=c("TCGAvalcut", "O", "L"))

#write.csv(TCGAdeseqRes, file="nature-tables/5J_TCGA.csv")

TCGAdeseqRes2=TCGAdeseqRes[which(TCGAdeseqRes$padj<0.05 & abs(TCGAdeseqRes$log2FoldChange)>1.5 
                                 & TCGAdeseqRes$baseMean>100), ]

A1=as.data.frame(TCGAdeseqRes2)

DT::datatable(A1, rownames=T, class='cell-border stripe',
          extensions="Buttons", options=list(dom="Bfrtip", buttons=c('csv', 'excel'), Xscroll=T))

write.csv(TCGAdeseqRes, file="nature-tables/TCGA_stable_growing.csv")

We can plot the differential genes here in a volcano plot:

#pdf("figure-outputs/Fig5J_TCGA_deg.pdf", height=8, width=8)
with(TCGAdeseqRes, plot(log2FoldChange, -log10(padj), pch=20, main="Volcano plot: Low exp (-) vs high (+)", cex=1.0, xlab=bquote(~Log[2]~fold~change), ylab=bquote(~-log[10]~Q~value)))
with(subset(TCGAdeseqRes, padj<0.05 & abs(log2FoldChange)>1.5 & baseMean>100), points(log2FoldChange, -log10(padj), pch=20, col="red", cex=0.5))
text(TCGAdeseqRes2$log2FoldChange+0.02, -log10(TCGAdeseqRes2$padj)+0.05, rownames(TCGAdeseqRes2))

Figure 15.15: yet another volcano plot

#with(subset(TCGAdeseqRes, padj<0.05 & abs(log2FoldChange)>2), text(log2FoldChange+0.05, -log10(padj)+0.05, rownames(TCGAdeseqRes), pch=20, col="red", cex=0.75))
#dev.off()

vstTCGA=vst(assay(TCGAdeseq))

ColSideCols=brewer.pal(3, "Blues")[factor(TCGAdeseq$TCGAvalcut)]

tx2=rownames(TCGAdeseqRes2)[which(rownames(TCGAdeseqRes2)%in%AllImmGenes)]
ax1=vstTCGA[ match(tx2, rownames(vstTCGA)),  ]
heatmap.2(ax1, scale="row", trace="none", ColSideColors = ColSideCols, col=RdBu[11:1], main="LumA TCGA")

Figure 15.16: because someone will ask for it


#pdf("figure-outputs/Figure5J_XX_heatmap_LumA_TCGA.pdf", height=15, width=10)

heatmap.2(vstTCGA[ rownames(TCGAdeseqRes2),  ], scale="row", trace="none", ColSideColors = ColSideCols, col=RdBu[11:1], main="LumA TCGA")

Figure 15.17: because someone will ask for it

also check the directions by looking at CXCl13, which is an overexpressed gene:

boxplot(vstTCGA["CXCL13", ]~TCGAdeseq$TCGAvalcut)

Check if there is a difference in checkpoint proteins between the two:

ColSideCols=brewer.pal(3, "Blues")[factor(TCGAdeseq$TCGAvalcut[which(TCGAdeseq$TCGAvalcut!="M")])]
m1=match(unlist(ImmSuppAPC$Inh), rownames(vstTCGA))
m2=match(unlist(ImmSuppAPC$Act), rownames(vstTCGA))
m3=match(c("IFNG", "CTLA4", "GZMB"), rownames(vstTCGA))
ax1=vstTCGA[ na.omit(c(m1, m2, m3)),  which(TCGAdeseq$TCGAvalcut!="M")]
RowSideCols=c(rep("red", length(na.omit(m1))), rep("blue", length(na.omit(m2))), rep("forestgreen",3))

#pdf("figure-outputs/Figure5J_XX_heatmap_LumA_TCGA.pdf", height=15, width=10)

heatmap.2(ax1, scale="row", trace="none", ColSideColors = ColSideCols, col=RdBu[11:1], main="LumA TCGA" , RowSideColors = RowSideCols,hclustfun = hclust.ave, Rowv = NA)

Run gsea here


hits=rownames(TCGAdeseqRes2)
fcTab=TCGAdeseqRes$log2FoldChange

names(fcTab)=rownames(TCGAdeseq)
  gscaTCGA=GSCA(listOfGeneSetCollections=ListGSC,geneList=fcTab, hits = hits)
   gscaTCGA<- preprocess( gscaTCGA, species="Hs", initialIDs="SYMBOL",
                    keepMultipleMappings=TRUE, duplicateRemoverMethod="max",
                    orderAbsValue=FALSE)
## -Preprocessing for input gene list and hit list ...
## --Removing genes without values in geneList ...
## --Removing duplicated genes ...
## --Converting annotations ...
## -- 2816 genes (out of 20060) could not be mapped to any identifier, and were removed from the data. 
## -- 2 genes (out of 70) could not be mapped to any identifier, and were removed from the data. 
## --Ordering Gene List decreasingly ...
## -Preprocessing complete!
   gscaTCGA <- analyze( gscaTCGA, 
                 para=list(pValueCutoff=0.05, pAdjustMethod="BH",
                           nPermutations=100, minGeneSetSize=5,
                           exponent=1), 
                           doGSOA = F)
## --21 gene sets don't have >= 5 overlapped genes with universe in gene set collection named c2List!
## --256 gene sets don't have >= 5 overlapped genes with universe in gene set collection named c5BP!
## --73 gene sets don't have >= 5 overlapped genes with universe in gene set collection named c5MF!
## --60 gene sets don't have >= 5 overlapped genes with universe in gene set collection named c5CC!
## --301 gene sets don't have >= 5 overlapped genes with universe in gene set collection named MetPathway!
## -Performing gene set enrichment analysis using HTSanalyzeR2... 
## --Calculating the permutations ... 
## -Gene set enrichment analysis using HTSanalyzeR2 complete 
## ==============================================
   
save( gscaTCGA, file="figure-outputs/TCGA_hairball.RData")
   
TermsA=sapply(strsplit(rownames(gscaTCGA@result$GSEA.results$ProcessNetworks), "_"), function(x) x[2])
TermsA[which(is.na(TermsA))]=substr(rownames(gscaTCGA@result$GSEA.results$ProcessNetworks)[which(is.na(TermsA))], 2, 50)

## check whether this runs:
gscaTCGA@result$GSEA.results$ProcessNetworks$Gene.Set.Term=TermsA
viewEnrichMap(gscaTCGA, gscs=c("ProcessNetworks"),
              allSig = TRUE, gsNameType="term" )

Figure 15.18: pointless TCGA hairball


write.csv(gscaTCGA@result$GSEA.results$ProcessNetworks, file="nature-tables/5k_TCGA_lumA_GSEA.csv")