Chapter 5 Spatial statistics

Below, we use three different metrics to compare spatial distributions:

  • k-nearest neighbour distances
  • the interacting fraction
  • morisita-horn distances

These are compared to manual inspection of the result

5.1 knn-Distances:

The k-nearest neighbour distances looks at the average distance from a given cell type of class A to a cell type of class B. In this section, the reference class A is the CD8 T cell, and we will look at the mean distance to SMA, Epcam, double positive and unclassified cells in each image.

To account for potential fluctuations due to misclassified cells, or isolated single cells, k values of 1, 3, 5 will be used. I.e. for each cell, we will compute the mean distance from each Cd8Tcell to its 1, 3, and 5 nearest neighbours.

5.1.1 Comparison to manual classification

Overall, we see that the differences in infiltrating vs restricted are similar. We see statistical differences (using anova followed by Tukey’s test) between:

  • epcam and SMA-epcam in both cases (higher distances to EpCAM on average)
  • SMA-Epcam to SMA (CD8s are closer to SMA+)
  • Unclass to Epcam-SMA (CD8s closer to unclass)

In the infiltrating case:

  • Unclass to Epcam (CD8s closer to unclass, borderline significant)

In the restricted cases, we see:

  • Unclass to SMA (higher distance to unclass in the restricted case)
  • SMA to epcam (CD8s are closer to the SMA)

This last result is consistent with what we expect for a CD8+ cell which is stroma-restricted.

knnMelt=melt(WSIknn, measure.vars=c("KS.pval"))

knnTemp1=knnMelt[knnMelt$CellType=="CD8" & knnMelt$NearestCellType%in%c("EpCAM", "SMA", "EpCAM: SMA", "Unclass"), ]
knnTemp1$SpatialManual=Cdata$Pathologist.CD8.Spatial[match(knnTemp1$L1, gsub("_", "", Cdata$TumorID))]

# compute p values?
ptest=sapply(levels(knnTemp1$knn), function(x) wilcox.test(knnTemp1$MeanDistance[which(knnTemp1$knn==x & knnTemp1$NearestCellType=="EpCAM" & knnTemp1$SpatialManual=="Infiltrating")], knnTemp1$MeanDistance[which(knnTemp1$knn==x & knnTemp1$NearestCellType=="SMA" & knnTemp1$SpatialManual=="Infiltrating")])$p.value)

fit1=aov(MeanDistance~NearestCellType+knn, data=knnTemp1[knnTemp1$SpatialManual=="Infiltrating", ])
fit2=aov(MeanDistance~NearestCellType+knn, data=knnTemp1[knnTemp1$SpatialManual=="restricted", ])


fit1b=aov(MedianDistance~NearestCellType+knn, data=knnTemp1[knnTemp1$SpatialManual=="Infiltrating", ])
fit2b=aov(MedianDistance~NearestCellType+knn, data=knnTemp1[knnTemp1$SpatialManual=="restricted", ])

#pdf(sprintf("rslt/WSI-analysis/knn_distances_vs_manualspatial_%s.pdf", Sys.Date()), height=7, width=12)

p<-ggplot(knnTemp1, aes(x=NearestCellType, y=MeanDistance, fill=SpatialManual))+geom_boxplot(outlier.shape=NA)+facet_grid(~knn+SpatialManual)+scale_y_continuous(trans='log10')+theme(axis.text.x = element_text(angle = 90, hjust = 1))+scale_color_manual(values = c("#e41a1c", "#377eb8"),na.value="black")+ggtitle("CD8-CelltypeX spatial distributions: mean values")

print(p)

par(mfrow=c(1,2), oma=c(0, 0, 2, 0))
plot(TukeyHSD(fit1, "NearestCellType"), las=2)
title("Infiltrating Mean Distance from CD8", line=3.3)
plot(TukeyHSD(fit2, "NearestCellType"), las=2)
title("Restricted Mean Distance from CD8", line=3.3)

Note above the infiltrating samples have shorter distances to EpCAM cells relative to SMA cells compared to restricted samples. (Tukey’s test shows that SMA-EPCAM distances for inf. are large but small in resis.)

5.1.2 Associations with outcome to treatment

We can also see if there is an association between these distances with growth and treatment

Treatment:

  • CD8 cells in PDL1 sample are further away to SMA+ cells and EpCAM+ (compared to vehicle or double agent)
  • CD8 cells in LY treated samples are further away from unclassified cells (compared to any of the other treatments)

#pdf("~/Desktop/4C-knn-summary-treatment.pdf", height=5, width=7)

knnTemp1$Treatment=Cdata$Treatment[match(knnTemp1$L1, gsub("_", "", Cdata$TumorID))]

fit1=aov(MedianDistance~Treatment, data=knnTemp1[knnTemp1$NearestCellType=="EpCAM" & knnTemp1$knn=="knn3", ])
fit2=aov(MedianDistance~Treatment, data=knnTemp1[knnTemp1$NearestCellType=="SMA"& knnTemp1$knn=="knn3", ])
fit3=aov(MedianDistance~Treatment, data=knnTemp1[knnTemp1$NearestCellType=="EpCAM: SMA"& knnTemp1$knn=="knn3", ])
fit4=aov(MedianDistance~Treatment, data=knnTemp1[knnTemp1$NearestCellType=="Unclass"& knnTemp1$knn=="knn3", ])

knnTemp1$Growth=Cdata$Tumor.Growth[match(knnTemp1$L1, gsub("_", "", Cdata$TumorID))]

fit1b=aov(MeanDistance~Growth, data=knnTemp1[knnTemp1$NearestCellType=="EpCAM"& knnTemp1$knn=="knn3", ])
fit2b=aov(MeanDistance~Growth, data=knnTemp1[knnTemp1$NearestCellType=="SMA"& knnTemp1$knn=="knn3", ])
fit3b=aov(MeanDistance~Growth, data=knnTemp1[knnTemp1$NearestCellType=="EpCAM: SMA"& knnTemp1$knn=="knn3", ])
fit4b=aov(MeanDistance~Growth, data=knnTemp1[knnTemp1$NearestCellType=="Unclass"& knnTemp1$knn=="knn3", ])

#pdf(sprintf("rslt/WSI-analysis/knn_distances_vs_treatment_growth_%s.pdf", Sys.Date()), height=7, width=12)

p<-ggplot(knnTemp1, aes(col=Treatment, y=MedianDistance, x=Treatment))+geom_boxplot(outlier.shape=NA)+geom_point(position=position_jitterdodge(),alpha=0.3)+facet_grid(~knn+NearestCellType)+scale_y_continuous(trans='log10')+theme(axis.text.x = element_text(angle = 90, hjust = 1))+scale_color_manual(values = ColMerge[ ,1],na.value="black")+theme_bw()
print(p)
knn Distances

Figure 5.1: knn Distances

Below, check whether any of the above are significant using Tukey’s ad-hoc test. Note that not many values cross 0 here, except for the LY samples for stromal cells

par(mfrow=c(2,2), oma=c(0, 0, 2, 0))
tukey.test <- TukeyHSD(fit1)
plot(TukeyHSD(fit1, "Treatment"), las=2)
title("EpCAM Mean Distance", line=3.3)
plot(TukeyHSD(fit2, "Treatment"), las=2)
title("SMA Mean Distance", line=3.3)
plot(TukeyHSD(fit3, "Treatment"), las=2)
title("EpCAM:SMA Mean Distance", line=3.3)
plot(TukeyHSD(fit4, "Treatment"), las=2)
title("Unclass Distance", line=3.3)

#dev.off()

summary(fit1)
##             Df Sum Sq Mean Sq F value Pr(>F)
## Treatment    3    365   121.7   0.307   0.82
## Residuals   54  21435   396.9
summary(fit2)
##             Df   Sum Sq Mean Sq F value Pr(>F)
## Treatment    3  2147233  715744   0.907  0.444
## Residuals   52 41037326  789179
summary(fit3)
##             Df   Sum Sq Mean Sq F value Pr(>F)
## Treatment    3  1969380  656460   0.467  0.707
## Residuals   29 40735615 1404676
summary(fit4)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Treatment    3   5763  1920.9    2.68 0.0559 .
## Residuals   54  38700   716.7                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

knnTemp1b=knnTemp1[which(knnTemp1$knn=="knn3"), ]

# save data to file
knnTemp1$sample=Cdata$NewID[match(knnTemp1$L1, gsub("_", "", Cdata$TumorID))]
DT::datatable(knnTemp1[which(knnTemp1$knn=="knn3"), c(2:5, 10:13)], rownames=F, class='cell-border stripe',
          extensions="Buttons", options=list(dom="Bfrtip", buttons=c('csv', 'excel'), scrollX=T))
#write.csv(knnTemp1[which(knnTemp1$knn=="knn3"), c(2:5, 10:13)], file="nature-tables/Fig4c_Ext4e.csv")

We can refine this comaprison to just knn3. We compare to the vehicle using a wilcox.test below:

knnTemp1b=knnTemp1[which(knnTemp1$knn=="knn3"), ]
ctypes=unique(knnTemp1b$NearestCellType)
t2=knnTemp1b[which(knnTemp1b$CellType=="CD8" & knnTemp1b$knn=="knn3") , ]

pval2=sapply(ctypes, function(x) wilcox.test(t2$MeanDistance[t2$NearestCellType==x & t2$Treatment=="Vehicle" ], t2$MeanDistance[t2$NearestCellType==x & t2$Treatment=="LY"])$p.value)

pval3=sapply(ctypes, function(x) wilcox.test(t2$MeanDistance[t2$NearestCellType==x & t2$Treatment=="Vehicle" ], t2$MeanDistance[t2$NearestCellType==x & t2$Treatment=="PDL1"])$p.value)

pval4=sapply(ctypes, function(x) wilcox.test(t2$MeanDistance[t2$NearestCellType==x & t2$Treatment=="Vehicle" ], t2$MeanDistance[t2$NearestCellType==x & t2$Treatment=="PDL1+LY"])$p.value)

pmelt=melt(cbind(LY=pval2, PDL1=pval3, 'PDL1+LY'=pval4))
colnames(pmelt)=c("NearestCellType","Treatment","label")
pmelt$label=round(pmelt$label, 2)
pmelt$value=0.8
pmelt$Treatment[which(pmelt$Treatment=="pval4")]="PDL1+LY"

p=ggplot(knnTemp1b[knnTemp1b$CellType=="CD8" , ], aes(x=Treatment, y=MeanDistance, col=Treatment))+facet_grid(~NearestCellType)+geom_boxplot()+ylab("knn3")+xlab("Distance in microns")+ggtitle("knn3 mean distance to cd8 cell ")+theme_bw()+geom_point(position=position_jitterdodge(),alpha=0.3)+scale_color_manual(values=c(ColMerge[ ,1], "black"))+scale_y_continuous(trans='log10')
p+geom_text(data=pmelt, mapping=aes(x=Treatment, y=0.8, label=label))

5.1.3 Growth

All 95% confidence lines cross 0, but it appears that stable cases have a closer unclass-CD8 interaction distance compared to growing.


p<-ggplot(knnTemp1, aes(col=Growth, y=MeanDistance, x=Growth))+geom_boxplot(outlier.shape=NA)+geom_point(position=position_jitterdodge(),alpha=0.3)+facet_grid(~knn+NearestCellType)+scale_y_continuous(trans='log10')+theme(axis.text.x = element_text(angle = 90, hjust = 1))+scale_color_manual(values = c(ColSizeb, "black"),na.value="black")+theme_bw()
print(p)

par(mfrow=c(2,2), oma=c(0, 0, 2, 0))
plot(TukeyHSD(fit1b, "Growth"), las=2)
title("EpCAM Mean Distance", line=3.3)
plot(TukeyHSD(fit2b, "Growth"), las=2)
title("SMA Mean Distance", line=3.3)
plot(TukeyHSD(fit3b, "Growth"), las=2)
title("EpCAM:SMA Mean Distance", line=3.3)
plot(TukeyHSD(fit4b, "Growth"), las=2)
title("Unclass Distance", line=3.3)


#dev.off()

summary(fit1b)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Growth       1   3209    3209   4.568 0.0374 *
## Residuals   51  35824     702                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 5 observations deleted due to missingness
summary(fit2b)
##             Df   Sum Sq Mean Sq F value Pr(>F)
## Growth       1   342252  342252   0.358  0.552
## Residuals   49 46806661  955238               
## 5 observations deleted due to missingness
summary(fit3b)
##             Df   Sum Sq Mean Sq F value Pr(>F)
## Growth       1   999209  999209    0.48  0.494
## Residuals   28 58307461 2082409               
## 3 observations deleted due to missingness
summary(fit4b)
##             Df Sum Sq Mean Sq F value Pr(>F)
## Growth       1   3469    3469   1.053   0.31
## Residuals   51 168073    3296               
## 5 observations deleted due to missingness

Can also plot the single result for knn=3

knnTemp1b=knnTemp1[which(knnTemp1$knn=="knn3"), ]
ctypes=unique(knnTemp1b$NearestCellType)
t2=knnTemp1b[which(knnTemp1b$CellType=="CD8" & knnTemp1b$knn=="knn3") , ]

pval2=sapply(ctypes, function(x) wilcox.test(t2$MeanDistance[t2$NearestCellType==x & t2$Growth=="growing" ], t2$MeanDistance[t2$NearestCellType==x & t2$Growth=="stable" ])$p.value)

ann_text=data.frame(Glabel=round(pval2,2), NearestCellType=(ctypes ), Growth="stable", value=0.7)

p=ggplot(knnTemp1b[knnTemp1b$CellType=="CD8" , ], aes(x=Growth, y=MeanDistance, col=Growth))+facet_grid(~NearestCellType)+geom_boxplot()+ylab("knn3")+xlab("Distance in microns")+ggtitle("kkn3 nearest neighbor distance to cd8 cell")+theme_bw()+geom_point(position=position_jitterdodge(),alpha=0.3)+scale_color_manual(values=c(ColSizeb, "black"))+scale_y_continuous(trans='log10')
p+geom_text(data=ann_text, mapping=aes(x=2, y=0.75, label=Glabel))

Is there a plot showing the different knn values? If there is, delete this section


Combine the Epithelial samples, and SMA/unclassified samples together: Is there a difference here?

knnTemp1b=knnTemp1[which(knnTemp1$knn=="knn1"), ]
knnTemp1b$CD8=WSIvalFracs[1, match(knnTemp1b$L1, colnames(WSIvalFracs))]
knnTemp1b$CD8frac=ifelse(knnTemp1b$CD8>0.1, "high", "low")
knnTemp1b$Ep=WSIvalFracs[2, match(knnTemp1b$L1, colnames(WSIvalFracs))]
knnTemp1b$Epfrac=ifelse(knnTemp1b$CD8>0.1, "high", "low")


knnTemp1c=knnTemp1b[knnTemp1b$NearestCellType%in%c("EpCAM"),  ]
knnTemp1d=knnTemp1b[knnTemp1b$NearestCellType%in%c("EpCAM: SMA"),  ]

knnEpMerge=merge(knnTemp1c, knnTemp1d[ ,c("MeanDistance", "MedianDistance", "variable", 
                                          "value","L1")], by.x="L1", by.y="L1", all=T)
knnEpMerge$EpDist2=ifelse(knnEpMerge$MedianDistance.x<knnEpMerge$MedianDistance.y, 
                          knnEpMerge$MedianDistance.x, knnEpMerge$MedianDistance.y)
knnEpMerge$EpDist2[which(is.na(knnEpMerge$EpDist2))]=knnEpMerge$MedianDistance.x[which(is.na(knnEpMerge$EpDist2))]

ggplot(knnEpMerge, aes(x=Growth , y=EpDist2, col=Growth))+geom_boxplot()+geom_point(position=position_jitterdodge(),alpha=0.3)+ylim(c(0, 200))+facet_grid(~CD8frac)

ggplot(knnEpMerge, aes(x=Treatment, y=EpDist2, col=Treatment))+facet_grid(~CD8frac)+geom_boxplot()+geom_point(position=position_jitterdodge(),alpha=0.3)+ylim(c(0, 200))

plot(knnEpMerge$CD8, knnEpMerge$EpDist2, col=factor(knnEpMerge$Growth))
text(knnEpMerge$CD8, knnEpMerge$EpDist2, knnEpMerge$L1)
plot(knnEpMerge$Ep, knnEpMerge$EpDist2, col=factor(knnEpMerge$Growth))
text(knnEpMerge$Ep, knnEpMerge$EpDist2, knnEpMerge$L1)
## what is this used for?
knnTempSumm=knnTemp1[which(knnTemp1$knn=="knn3"), ]
knnreshape=acast(knnTempSumm[ ,c("NearestCellType", "MeanDistance", "L1")], L1~NearestCellType, value.var="MeanDistance" )
knnreshape=data.frame(knnreshape)
knnreshape$EpMIN=ifelse(knnreshape$EpCAM..SMA<knnreshape$EpCAM, knnreshape$EpCAM..SMA, knnreshape$EpCAM)
knnreshape$EpMIN[which(is.na(knnreshape$EpMIN))]=knnreshape$EpCAM[which(is.na(knnreshape$EpMIN))]
knnreshape$EpStrRatio1=knnreshape$EpCAM/rowSums(knnreshape[ ,c("EpCAM..SMA", "SMA")], na.rm=T)
knnreshape$EpStrRatio2=knnreshape$EpCAM/(knnreshape$SMA)
knnreshape$EpStrRatio3=rowSums(knnreshape[ ,c("EpCAM..SMA", "EpCAM")], na.rm=T)/rowSums(knnreshape[ ,c("EpCAM..SMA", "SMA")], na.rm=T)
knnreshape$EpStrRatio4=rowSums(knnreshape[ ,c("EpCAM..SMA", "EpCAM")], na.rm=T)/(knnreshape$SMA)

5.2 The interacting fraction

The interacting fraction uses the knn-distances and determines the proportion of CD8 cells which are within a proximity of r um from celltype B.

5.2.1 Comparison to manual & select optimal r

Below are plots of the proportion of CD8 cells within an “interacting distance” as we increase r. This looks at both the interacting fraction of CD8 cells with Epcam+ and SMA+ cells. Lines are color coded according to the manual spatial-infiltration annotation.

We notice from the line plots for each single sample that the restricted samples generally have low interacting fractins with EpCAM and SMA compared to the infiltrating samples. In addition, there is a statistical difference in EpCAM measurements compared to SMA.

IFmelt=melt(WSIIF, measure.vars=c("IF"))

IFmelt$SpatialManual=Cdata$Pathologist.CD8.Spatial[match(IFmelt$L1, gsub("_", "", Cdata$TumorID))]
IFmelt$Treatment=Cdata$Treatment[match(IFmelt$L1, gsub("_", "", Cdata$TumorID))]
IFmelt$Growth=Cdata$Tumor.Growth[match(IFmelt$L1, gsub("_", "", Cdata$TumorID))]
#IFmelt$Growth=IFmelt$Growth
#IFmelt$Growth[grep("no data", IFmelt$Growth)]="no data"

IFmelt$Dist=(substr(IFmelt$Grid, 6, 7))
IFmelt$knn=substr(IFmelt$Grid, 1, 4)
IFmelt$sample=Cdata$NewID[match(IFmelt$L1, gsub("_", "", Cdata$TumorID))]

IFTempSumm=IFmelt[IFmelt$Grid=="knn3-15" & IFmelt$Reference=="CD8", ]
IFreshape=acast(IFTempSumm[ ,c("NearestNeighbor", "value", "L1")], L1~NearestNeighbor, value.var="value" )
IFreshape=data.frame(IFreshape)

IFreshape$EpMIN=ifelse(IFreshape$EpCAM..SMA<IFreshape$EpCAM, IFreshape$EpCAM..SMA, IFreshape$EpCAM)
IFreshape$EpMIN[which(is.na(IFreshape$EpMIN))]=IFreshape$EpCAM[which(is.na(IFreshape$EpMIN))]
IFreshape$EpStrRatio1=IFreshape$EpCAM/rowSums(IFreshape[ ,c("EpCAM..SMA", "SMA")], na.rm=T)
IFreshape$EpStrRatio2=IFreshape$EpCAM/(IFreshape$SMA)
IFreshape$EpStrRatio3=rowSums(IFreshape[ ,c("EpCAM..SMA", "EpCAM")], na.rm=T)/rowSums(IFreshape[ ,c("EpCAM..SMA", "SMA")], na.rm=T)
IFreshape$EpStrRatio4=rowSums(IFreshape[ ,c("EpCAM..SMA", "EpCAM")], na.rm=T)/(IFreshape$SMA)


IFreshape$Treatment=factor(IFTempSumm$Treatment[match(rownames(IFreshape), IFTempSumm$L1)])
IFreshape$Growth=factor(IFTempSumm$Growth[match(rownames(IFreshape), IFTempSumm$L1)])
IFreshape$Infil=factor(IFTempSumm$SpatialManual[match(rownames(IFreshape), IFTempSumm$L1)])
IFreshape$CD8frac=SummaryData$CD8Frac.WSI[match(rownames(IFreshape), rownames(SummaryData))]
IFreshape$TumSize=SummaryData$TumSize[match(rownames(IFreshape), rownames(SummaryData))]

####
# line plots to see the best separation between infiltrating and restricted
###

#pdf(sprintf("rslt/WSI-analysis/interacting_fraction_compared_manual_%s.pdf", Sys.Date()), height=8, width=14)

IFmelt2=IFmelt[IFmelt$NearestNeighbor=="EpCAM" & IFmelt$Reference=="CD8" , ]
IFmelt2$label=IFmelt2$L1
IFmelt2$label[which(IFmelt2$Dist!=30)]=NA
p<-ggplot(IFmelt2, aes(x=Dist, y=value, col=SpatialManual, group=L1, label=label))+facet_grid(~knn)+geom_line(aes(group=L1))+ylab("Interacting Fraction")+xlab("Distance in microns")+ggtitle("CD8-EpCAM interacting fraction: spatial manual")+geom_label()
print(p)


#IFmelt2$Dist=as.numeric(IFmelt2$Dist)

p<-ggplot(IFmelt2, aes(x=Dist, y=value, col=SpatialManual, label=label))+geom_boxplot()+ylab("Interacting Fraction")+xlab("Distance in microns")+facet_grid(~knn)+ggtitle("CD8-EpCAM knn3")+stat_smooth()
print(p)


IFmelt2=IFmelt[IFmelt$NearestNeighbor=="SMA" & IFmelt$Reference=="CD8" , ]
IFmelt2$label=IFmelt2$L1
IFmelt2$label[which(IFmelt2$Dist!=30)]=NA

p<-ggplot(IFmelt2, aes(x=Dist, y=value, col=SpatialManual, group=L1, label=label))+facet_grid(~knn)+geom_line(aes(group=L1))+ylab("Interacting Fraction")+xlab("Distance in microns")+ggtitle("CD8-SMA interacting fraction: spatial manual")+geom_label()
print(p)



p<-ggplot(IFmelt2, aes(x=Dist, y=value, col=SpatialManual, label=label))+geom_boxplot()+ylab("Interacting Fraction")+xlab("Distance in microns")+facet_grid(~knn)+ggtitle("CD8-SMA knn3 ")
print(p)


# ggplot(IFmelt2, aes(x=Dist, y=value, col=SpatialManual, linetype=SpatialManual))+geom_point()+stat_smooth()+ggtitle("CD8-EpCAM interacting fraction: spatial manual")+ylab("Interacting Fraction")

# ## Do a dot plot
 # p<-ggplot(IFmelt2, aes(x=Dist, y=value, col=SpatialManual, label=label))+geom_boxplot()+ylab("Interacting Fraction")+xlab("Distance in microns")+facet_grid(~knn)+ggtitle("CD8-SMA knn3 ")
 # print(p)

Using the boxplots as a guide, we can determine optimal “interacting distances” at which to perform downstream analysis. The best separation between restricted and infiltrating for EpCAM appears at:

  • 1-nn: 10-15 um
  • 3-nn: 15 um
  • 5-nn: 20 um

The interacting fraction does not distinguish SMA fractions (all restricted boxplots overlap with the infiltrating boxplots)


#The following plots use 3NN analysis with an interacting distance of 15um. We can firstly check if there is an association between different "interacting fraction" types and manual scoring, similar to what was performed for knn-analysis. Only CD8-EpCAM interacting distances is associated with manual scoring. All other metrics are not significant.


## reshape and do a correlation plot like for the knn analysis
par(mfrow=c(3,3))
for (i in 1:9){
  a1=wilcox.test(IFreshape[ ,i]~IFreshape$Infil)
  boxplot(IFreshape[ ,i]~IFreshape$Infil, main=paste(colnames(IFreshape)[i]," p=", round(a1$p.value,2), sep="" ), xlab="IF: knn3, 15um")
}

# Again, association between CD8 content and interacting fraction was observed ONLY in the growing samples or restricted cases. 
par(mfrow=c(1,2))
a1x=cor.test(IFreshape$CD8frac[IFreshape$Infil=="Infiltrating"], IFreshape$EpCAM[IFreshape$Infil=="Infiltrating"])
a1y=cor.test(IFreshape$CD8frac[IFreshape$Infil=="restricted"], IFreshape$EpCAM[IFreshape$Infil=="restricted"])
plot(IFreshape$CD8frac, IFreshape$EpCAM, col=IFreshape$Infil, pch=19, xlab="CD8 fraction", ylab="CD8-EpCAM IF (knn3-15um)", main="spatial scoring")
legend("topright", c(paste("infil p=", round(a1x$p.value,2)), paste("restrict p=", round(a1y$p.value,2))), lwd=2, col=c(1,2))

a1x=cor.test(IFreshape$CD8frac[IFreshape$Growth=="growing"], IFreshape$EpCAM[IFreshape$Growth=="growing"])
a1y=cor.test(IFreshape$CD8frac[IFreshape$Growth=="stable"], IFreshape$EpCAM[IFreshape$Growth=="stable"])

plot(IFreshape$CD8frac, IFreshape$EpCAM, col=IFreshape$Growth, pch=19, xlab="CD8 fraction", ylab="CD8-EpCAM IF (knn3-15um)", main="tumor growth")
legend("topright", c(paste("growing p=", round(a1x$p.value,2)), paste("stable p=", round(a1y$p.value,2))), lwd=2, col=c(1,2))

5.2.2 Growth

Here, we check if there is an association between the spatial pattern and tumor growth. P value by wilcox test shown


######
# compare these metrics with growth
####
#pdf("~/Desktop/4D-IF-summary-growth.pdf", height=5, width=7)

#pdf(sprintf("rslt/WSI-analysis/interacting_fraction_vs_treatment_growth_%s.pdf", Sys.Date()), height=7, width=12)
ctypes=unique(IFmelt$NearestNeighbor)
t2=IFmelt[which(IFmelt$Reference=="CD8" & IFmelt$knn=="knn3" & IFmelt$Dist==15) , ]

pval2=sapply(ctypes, function(x) wilcox.test(t2$value[t2$NearestNeighbor==x & t2$Growth=="growing" ], t2$value[t2$NearestNeighbor==x & t2$Growth=="stable" ])$p.value)

ann_text=data.frame(Glabel=round(pval2,3), NearestNeighbor=(ctypes ), Growth="stable", value=0.7)

p=ggplot(IFmelt[IFmelt$Reference=="CD8" & IFmelt$knn=="knn3" & IFmelt$Dist==15 , ], aes(x=Growth, y=value, col=Growth))+facet_grid(~NearestNeighbor)+geom_boxplot()+ylab("Interacting Fraction")+xlab("Distance in microns")+ggtitle("Interacting fraction, knn3, dist=15")+theme_bw()+geom_point(position=position_jitterdodge(),alpha=0.3)+scale_color_manual(values=c(ColSizeb, "black"))
p+geom_text(data=ann_text, mapping=aes(x=2, y=0.75, label=Glabel))
interacting fraction

Figure 5.2: interacting fraction


DT::datatable(IFmelt[IFmelt$Reference=="CD8" & IFmelt$knn=="knn3" & IFmelt$Dist==15 , c(2:4, 6, 8:9, 12)], rownames=F, class='cell-border stripe',
          extensions="Buttons", options=list(dom="Bfrtip", buttons=c('csv', 'excel'), scrollX=T))

Figure 5.2: interacting fraction

pval2
##         CD8       EpCAM  EpCAM: SMA         SMA     Unclass 
## 0.716774420 0.001914251 0.395488483 0.154356972 0.428393111

5.2.3 Treatment

Similarly, compare the distances with treatment:

#pdf("~/Desktop/4D-IF-summary-treatment.pdf", height=5, width=7)

ctypes=unique(IFmelt$NearestNeighbor)
t2=IFmelt[which(IFmelt$Reference=="CD8" & IFmelt$knn=="knn3" & IFmelt$Dist==15) , ]
TreatV=sort(unique(IFmelt$Treatment))

pval2=matrix(NA, nrow=3, ncol=5)
colnames(pval2)=ctypes
rownames(pval2)=TreatV[1:3]

for (i in 1:3){
pval2[i, ]=sapply(ctypes, function(x) wilcox.test(t2$value[t2$NearestNeighbor==x & t2$Treatment==TreatV[i]], t2$value[t2$NearestNeighbor==x & t2$Treatment=="Vehicle"])$p.value)
}

pmelt=melt(pval2)
colnames(pmelt)=c("Treatment", "NearestNeighbor", "label")
pmelt$label=round(pmelt$label, 2)
pmelt$value=0.8

p<-ggplot(IFmelt[IFmelt$Reference=="CD8" & IFmelt$knn=="knn3" & IFmelt$Dist==15 , ], aes(x=Treatment, y=value, col=Treatment))+facet_grid(~NearestNeighbor)+geom_boxplot()+ylab("Interacting Fraction")+xlab("Distance in microns")+ggtitle("Interacting fraction, knn3, dist=15")+geom_point(position=position_jitterdodge(),alpha=0.3)+scale_color_manual(values=c(ColMerge[,1]))
p+geom_text(data=pmelt, mapping=aes(x=Treatment, y=0.75, label=label, col=Treatment))


fit1=aov(value~Treatment, data=IFmelt[IFmelt$NearestNeighbor=="EpCAM" & IFmelt$Reference=="CD8", ])
fit2=aov(value~Treatment, data=IFmelt[IFmelt$NearestNeighbor=="SMA"& IFmelt$Reference=="CD8", ])
fit3=aov(value~Treatment, data=IFmelt[IFmelt$NearestNeighbor=="EpCAM: SMA"& IFmelt$Reference=="CD8", ])
fit4=aov(value~Treatment, data=IFmelt[IFmelt$NearestNeighbor=="Unclass"& IFmelt$Reference=="CD8", ])

There appears to be a difference in CD8-unclass interactions in LY treated samples (LY, PDL1+LY), but not in the other cases

5.3 M-H distances

The M-H distance (or Morisita Horn index) can be considered as a correlation coefficient in spatial distribution between cell type A and cell type B. To calculate this metric, the whole slide image is divided into grids of size 50 to 500um. Within each grid, the total number of cells A and B are determined.

The M-H index is thus determined as:

\[ \frac{2\sum_{i=1}^n a_ib_i}{(D_a+D_b)AB} \] where \(a_i\) and \(b_i\) are the number of cells in grid \(i\), \(A\) and \(B\) the total number of cells, and \(D_x\) is the Simpson’s index.

5.3.1 Comparison to Manual Scoring

Similar to the interacting fraction, we plot the MH index for increasing values of gridsize to determine an optimal metric to compare spatial patterns. Ideally, we would pick a metric has the following properties:

  • good separation of the different values
  • a reasonable number of cells within each grid (avoid too small grids which give counts of 0)
  • avoid plateauing of MH values because the grid size is too large
MHgridsize=150

MHmeltsumm=melt(WSIMHsetup, id.vars=c("gridsize", "Ntiles"))

MHmelt=melt(WSIMH, measure.vars="MH.mean")
MHmelt$SpatialManual=Cdata$Pathologist.CD8.Spatial[match(MHmelt$L1, gsub("_", "", Cdata$TumorID))]
MHmeltsumm$SpatialManual=Cdata$Pathologist.CD8.Spatial[match(MHmeltsumm$L1, gsub("_", "", Cdata$TumorID))]
MHmelt$Treatment=factor(Cdata$Treatment[match(MHmelt$L1, gsub("_", "", Cdata$TumorID))])
MHmelt$Growth=factor(Cdata$Tumor.Growth[match(MHmelt$L1, gsub("_", "", Cdata$TumorID))])
MHmelt$Growth=MHmelt$Growth
#MHmelt$Growth[grep("no data", MHmelt$Growth)]="no data"

A1=MHmelt[MHmelt$Var2=="CD8", ]
A1$label=A1$L1
A1$label[which(A1$gridsize!=500)]=NA

A2=MHmelt[MHmelt$Var2=="CD8" & MHmelt$gridsize==MHgridsize & MHmelt$Var1%in%c("EpCAM", "SMA"), ]
A3=MHmelt[MHmelt$Var2=="CD8" & MHmelt$gridsize==MHgridsize, ]
A3$CD8frac=WSIvalFracs[ 1, match(A3$L1, colnames(WSIvals))]

MHTempSumm=A3 #MHmelt[which(MHmelt$gridsize==300 & MHmelt$Var2=="CD8"), ]
MHreshape=acast(A3[ ,c("Var1", "value", "L1")], L1~Var1, value.var="value" )
MHreshape=data.frame(MHreshape)
MHreshape$EpMIN=ifelse(MHreshape$EpCAM..SMA<MHreshape$EpCAM, MHreshape$EpCAM..SMA, MHreshape$EpCAM)
MHreshape$EpMIN[which(is.na(MHreshape$EpMIN))]=MHreshape$EpCAM[which(is.na(MHreshape$EpMIN))]
MHreshape$EpStrRatio1=MHreshape$EpCAM/rowSums(MHreshape[ ,c("EpCAM..SMA", "SMA")], na.rm=T)

MHreshape$EpStrRatio2=MHreshape$EpCAM/(MHreshape$SMA)
MHreshape$EpStrRatio3=rowSums(MHreshape[ ,c("EpCAM..SMA", "EpCAM")], na.rm=T)/rowSums(MHreshape[ ,c("EpCAM..SMA", "SMA")], na.rm=T)
MHreshape$EpStrRatio4=rowSums(MHreshape[ ,c("EpCAM..SMA", "EpCAM")], na.rm=T)/(MHreshape$SMA)
MHreshape$Treatment=factor(A3$Treatment[match(rownames(MHreshape), A3$L1)])
MHreshape$Growth=factor(A3$Growth[match(rownames(MHreshape), A3$L1)])
MHreshape$Infil=factor(A3$SpatialManual[match(rownames(MHreshape), A3$L1)])

#pdf(sprintf("rslt/WSI-analysis/MHplots_compare_spatial_manual_%s.pdf", Sys.Date()), height=7, width=12)

p<-ggplot(A1, aes(x=gridsize, y=value, col=SpatialManual, label=label))+facet_grid(~Var1)+geom_line(aes(group=L1))+xlab("grid size")+ylab("Morisita Horn index")+ggtitle("MH index with CD8: spatial manual")+geom_label()
print(p)


ggplot(A1, aes(x=factor(gridsize), y=value, col=SpatialManual, label=label))+facet_grid(~Var1)+geom_boxplot()+xlab("grid size")+ylab("Morisita Horn index")+ggtitle("MH index with CD8: spatial manual")


ggplot(MHmeltsumm, aes(x=factor(gridsize), y=value, col=SpatialManual))+facet_wrap(~variable, scale="free_y")+geom_boxplot()+xlab("grid size")+ylab("Morisita Horn index")+ggtitle("expected number of cells in each grid size")+scale_y_log10()

With increasing grid size, optimal differences between infiltrating and restricted appear at the following sizes:

  • epCAML 100um+
  • epcam:SMA most significant at 350+
  • SMA: 150+
  • Unclass:200+

We probably want to use a metric/gridsize of 250 um as the expected/mean number of cells in each grid is 10 here. Other notes:

  • double positive cells (EpCAM+SMA+) appear in predominantly the restricted cases?
  • higher SMA- stromal cells in restricted

5.3.2 Growth

Below are the MH indices with increasing grid-size for individual samples. In general, there is a subset of stable samples which have very high intermixing


p<-ggplot(A1, aes(x=gridsize, y=value, col=Growth))+facet_grid(~Var1)+geom_line(aes(group=L1))+ylab("grid size")+ylab("Morisita Horn index")+ggtitle("MH index with CD8: growth")+geom_point(position=position_jitterdodge(),alpha=0.3)
print(p)
Fig4e MH index with CD8 growth

Figure 5.3: Fig4e MH index with CD8 growth

We can condense these values based on growing vs stable cases:

#pdf("figure-outputs/4E-MH-summary-growth.pdf", height=5, width=7)

pvals=sapply(unique(A3$Var1), function(x) wilcox.test(A3$value[A3$Var1==x & A3$Growth=="growing"], 
                                                      A3$value[A3$Var1==x & A3$Growth=="stable"])$p.value)

ann_text=data.frame(label=round(pvals,2), Var1=unique(A3$Var1), Growth="growing")

ggplot(A3, aes(x=Growth, y=value, col=Growth))+facet_grid(~Var1)+geom_boxplot()+geom_point()+ylab("grid size")+ylab("Morisita Horn index")+ggtitle("MH index with CD8: growth")+geom_text(data=ann_text, mapping = aes(x=2, y=0.75, label=label))+scale_color_manual(values=c(ColSizeb, "black"))

#dev.off()

DT::datatable(A3, rownames=F, class='cell-border stripe',
          extensions="Buttons", options=list(dom="Bfrtip", buttons=c('csv', 'excel'), scrollX=T))

#write.csv(A3, file="nature-tables/Fig4e-Ext4h.csv")

Although the MH values for growing vs stable are not different, we can compare the mixing in epcam vs stroma in matched samples:

#pdf("~/Desktop/4E-MH-summary-matched-samples-growing-stable.pdf", height=5, width=7)


A2t=A2[-which(A2$L1%in%c("15RD", "5LB")), ]

 a1=t.test(A2t$value[A2t$Growth=="stable" & A2t$Var1=="EpCAM"], A2t$value[A2t$Growth=="stable" & A2t$Var1=="SMA"], paired=T)
 a2=t.test(A2t$value[A2t$Growth=="growing" & A2t$Var1=="EpCAM"], A2t$value[A2t$Growth=="growing" & A2t$Var1=="SMA"], paired=T)
# 
ggplot(A2, aes(x=Var1, y=value, col=Growth))+facet_grid(~Growth)+geom_line(aes(group=L1))+ylab("grid size")+ylab(sprintf("Morisita Horn index @ %s", MHgridsize))+ggtitle(sprintf("MH index with CD8: stable p=%s, growing p=%s", round(a1$p.value, 2), round(a2$p.value, 2)))+geom_errorbar(aes(ymin=MH.lower, ymax=MH.upper), width=.2)+geom_point()

In the growing cases, there is a difference in the MH index in epacam and stroma, but the stable cases do not support this.

5.3.3 Treatment


#pdf("~/Desktop/4E-MH-summary-treatment.pdf", height=5, width=7)


pval2=matrix(NA, nrow=3, ncol=4)
colnames(pval2)=unique(A3$Var1)
rownames(pval2)=TreatV[1:3]

for (i in 1:3){
pval2[i, ]=sapply(unique(A3$Var1), function(x) wilcox.test(A3$value[A3$Var1==x & A3$Treatment==TreatV[i]], A3$value[A3$Var1==x & A3$Treatment=="Vehicle"])$p.value)
}

pmelt=melt(pval2)
colnames(pmelt)=c("Treatment", "Var1", "label")
pmelt$label=round(pmelt$label, 2)
pmelt$value=0.8

p<-ggplot(A1, aes(x=gridsize, y=value, col=Treatment))+facet_grid(~Var1)+geom_line(aes(group=L1))+ylab("grid size")+ylab("Morisita Horn index")+ggtitle("MH index with CD8: Treatment")+geom_point(position=position_jitterdodge(),alpha=0.3)
print(p)

ggplot(A3, aes(x=Treatment, y=value, col=Treatment))+facet_grid(~Var1)+geom_boxplot()+ylab("grid size")+ylab("Morisita Horn index")+ggtitle("MH index with CD8: Treatment")+geom_text(data=pmelt, mapping=aes(x=Treatment, y=0.8, label=label))+geom_point(position=position_jitterdodge(),alpha=0.3)+theme_bw()+scale_color_manual(values=ColMerge[ ,1])
MH values with Treatment

Figure 5.4: MH values with Treatment



#dev.off()

There is no difference between the different spatial metrics compared to the vehicle, however, we can compare for a given treatment if there is a difference between the epcam and the stromal interaction scores. It appears that there is a difference only in the control, where SMA mixing is higher than EPcam mixing:

## pairwise comparison: Epcam vs stroma
## calculate the pairwise p values here
A2t=A2[-which(A2$L1%in%c("15RD", "5LB")), ]

a1=t.test(A2t$value[A2t$Treatment=="Vehicle" & A2t$Var1=="EpCAM"], A2t$value[A2t$Treatment=="Vehicle"& A2t$Var1=="SMA"], paired=T)
a2=t.test(A2t$value[A2t$Treatment=="PDL1" & A2t$Var1=="EpCAM"], A2t$value[A2t$Treatment=="PDL1" & A2t$Var1=="SMA"], paired=T)
a3=t.test(A2t$value[A2t$Treatment=="PDL1+LY" & A2t$Var1=="EpCAM"], A2t$value[A2t$Treatment=="PDL1+LY" & A2t$Var1=="SMA"], paired=T)
a4=t.test(A2t$value[A2t$Treatment=="LY" & A2t$Var1=="EpCAM"], A2t$value[A2t$Treatment=="LY" & A2t$Var1=="SMA"], paired=T)

p<-ggplot(A2, aes(x=Var1, y=value, col=Treatment))+facet_grid(~Treatment)+geom_line(aes(group=L1))+ylab("grid size")+ylab(sprintf("Morisita Horn index @ %s", MHgridsize))+ggtitle(sprintf("MH index with CD8: Cntl p=%s, PDL1 p=%s LY p=%s P+L p=%s", round(a1$p.value, 2), round(a2$p.value, 2), round(a3$p.value,2), round(a4$p.value, 2)))+geom_errorbar(aes(ymin=MH.lower, ymax=MH.upper), width=.2)+geom_point()
print(p)

5.4 Comparison between metrics

After assessing optimal parameters for each metric, in this section we assess which metric could be the best for spatial analysis.

Below is a table of the different metrics and their values for each sample

#write.table(SummaryData, file="../metadata/WSI_compared_other_metrics.csv")

##
# Combine the data from above into one file
#
df.temp=cbind(knnreshape[ , 1:9], IFreshape[ ,1:10], MHreshape[ , 1:9] )
colnames(df.temp)=paste(rep(c("knn", "IF", "MH"), times=c(9, 10, 9)), colnames(df.temp),sep=".")
#colnames(df.Spatial)=paste()
df.Spatial=cbind(df.Spatial, df.temp)
#df.Spatial=cbind(df.Spatial,IFreshape[ ,11:14])
df.Spatial$GrowthRate=Cdata$GrowthRate[match(rownames(df.Spatial), gsub("_", "", Cdata$TumorID))]
df.Spatial$TumSize=Cdata$Tumor.diameter.sac.mm[match(rownames(df.Spatial), gsub("_", "", Cdata$TumorID))]
df.Spatial$knn.EpCAMcut=cut(df.Spatial$knn.EpCAM, c(-1, median(df.Spatial$knn.EpCAM, na.rm = T), 1.2), c("low", "high"))
df.Spatial$MH.EpCAMcut=cut(df.Spatial$MH.EpCAM, c(-1, median(df.Spatial$MH.EpCAM, na.rm = T), 1.2), c("low", "high"))
df.Spatial$IF.EpCAMcut=cut(df.Spatial$IF.EpCAM, c(-1, median(df.Spatial$IF.EpCAM, na.rm = T), 1.2), c("low", "high"))
df.Spatial$CD8Fraccut=cut(df.Spatial$CD8frac, c(-1, median(df.Spatial$CD8frac, na.rm = T), 1.2), c("low", "high"))

#t2=WSIvalFracs[, match(rownames(df.Spatial), colnames(WSIvalFracs))]
#rownames(t2)=paste(rownames(t2), "Frac.WSI", sep="")
#t3=WSIvals[, match(rownames(df.Spatial), colnames(WSIvalFracs))]
#rownames(t2)=paste(rownames(t2), ".WSI", sep="")

#df.Spatial=cbind(df.Spatial, t(t2), t(t3))

scroll_box(kable(df.Spatial, format="html"),
         height="300px", width="100%")
CD8 EpCAM EpCAM:SMA SMA Unclass CD8frac EpCAMfrac EpCAM:SMAfrac SMAfrac Unclassfrac knn.EpCAM knn.EpCAM..SMA knn.SMA knn.Unclass knn.EpMIN knn.EpStrRatio1 knn.EpStrRatio2 knn.EpStrRatio3 knn.EpStrRatio4 IF.CD8 IF.EpCAM IF.EpCAM..SMA IF.SMA IF.Unclass IF.EpMIN IF.EpStrRatio1 IF.EpStrRatio2 IF.EpStrRatio3 IF.EpStrRatio4 MH.EpCAM MH.EpCAM..SMA MH.SMA MH.Unclass MH.EpMIN MH.EpStrRatio1 MH.EpStrRatio2 MH.EpStrRatio3 MH.EpStrRatio4 GrowthRate TumSize knn.EpCAMcut MH.EpCAMcut IF.EpCAMcut CD8Fraccut
10LB 65507 761887 343389 1126173 563894 0.0228977 0.2663149 0.1200304 0.3936498 0.1971072 39.96193 52.72253 16.94935 28.07623 39.96193 0.5735732 2.3577258 1.3302993 5.4683183 0.2053826 0.2236555 0.0803884 0.5905018 0.3010823 0.0803884 3.333713e-01 3.787550e-01 4.531947e-01 5.148906e-01 0.3580143 0.3510808 0.4990200 0.4513703 0.3510808 0.4211433 0.7174347 0.8341306 1.4209752 6.2285714 35 NA high low low
10LC 3481 102702 58017 106919 75799 0.0100341 0.2960411 0.1672355 0.3081967 0.2184926 32.12258 34.57734 17.87345 24.28626 32.12258 0.6124326 1.7972227 1.2716665 3.7317865 0.0784257 0.3168630 0.2375754 0.4768745 0.3714450 0.2375754 4.435062e-01 6.644578e-01 7.760354e-01 1.162651e+00 0.3338239 0.3690715 0.3937566 0.4370583 0.3338239 0.4376135 0.8477925 0.9214336 1.7851014 5.3571429 25 high low high low
10LD 67342 192699 204061 402092 122548 0.0681088 0.1948931 0.2063845 0.4066703 0.1239434 34.33110 30.18133 15.97086 35.99495 30.18133 0.7438671 2.1496080 1.3978193 4.0393825 0.2816222 0.2944373 0.2563779 0.5786730 0.1450209 0.2563779 3.525981e-01 5.088147e-01 6.596187e-01 9.518592e-01 0.6191886 0.5596224 0.6542665 0.5383814 0.5596224 0.5100867 0.9463859 0.9711029 1.8017290 1.9729730 10 high high high high
10NA 27239 324933 0 167028 65796 0.0465627 0.5554448 0.0000000 0.2855199 0.1124726 23.80977 NA 27.49276 48.19205 23.80977 0.8660376 0.8660376 0.8660376 0.8660376 0.3591542 0.4808547 NA 0.3207166 0.1112743 0.4808547 1.499313e+00 1.499313e+00 1.499313e+00 1.499313e+00 0.3849200 NA 0.4254207 0.3382134 0.3849200 0.9047983 0.9047983 0.9047983 0.9047983 NA 12 high high high high
10ND 22618 463070 0 282453 227392 0.0227195 0.4651478 0.0000000 0.2837204 0.2284123 26.20801 NA 30.06210 40.01762 26.20801 0.8717958 0.8717958 0.8717958 0.8717958 0.3008666 0.4408878 NA 0.1738438 0.1426298 0.4408878 2.536114e+00 2.536114e+00 2.536114e+00 2.536114e+00 0.3107974 NA 0.2528283 0.2367529 0.3107974 1.2292825 1.2292825 1.2292825 1.2292825 0.3930533 15 high low high low
10RBL 3393 58223 25198 42642 37922 0.0202715 0.3478534 0.1505455 0.2547647 0.2265650 44.92703 67.36295 18.22774 26.07893 44.92703 0.5249056 2.4647619 1.3119416 6.1603904 0.1279104 0.2213380 0.2039493 0.5758915 0.4544651 0.2039493 2.838246e-01 3.843398e-01 5.453515e-01 7.384852e-01 0.4376521 0.4278403 0.5676946 0.5101371 0.4278403 0.4396151 0.7709288 0.8693742 1.5245739 4.9761905 40 NA high low low
10RBU 178767 437050 895393 763776 390628 0.0670641 0.1639585 0.3359050 0.2865291 0.1465433 87.12901 76.12121 20.78384 35.23226 76.12121 0.8991173 4.1921519 1.6846410 7.8546713 0.4317687 0.0816202 0.3119480 0.4723411 0.2877489 0.0816202 1.040690e-01 1.727993e-01 5.018152e-01 8.332287e-01 0.3034275 0.3661208 0.5282356 0.5051726 0.3034275 0.3392691 0.5744170 0.7486370 1.2675183 NA 12 NA low low high
10RC 1823 15806 1209 5729 19118 0.0417306 0.3618176 0.0276754 0.1311434 0.4376331 25.68401 94.64722 31.71923 16.94124 25.68401 0.2032503 0.8097300 0.9522403 3.7936364 0.1695008 0.4119583 0.0142622 0.1579813 0.5798135 0.0142622 2.391720e+00 2.607639e+00 2.474522e+00 2.697917e+00 0.5653879 0.3556849 0.5385849 0.6096737 0.3556849 0.6322341 1.0497655 1.0299719 1.7101719 NA 4 high high high high
11LB 196293 587504 13 47 506036 0.1521777 0.4554672 0.0000101 0.0000364 0.3923085 69.10296 7919.73815 6967.17455 17.47761 69.10296 0.0046419 0.0099184 0.5366352 1.1466400 0.6929437 0.2080105 0.0000000 0.0000102 0.5774735 0.0000000 2.041550e+04 2.041550e+04 2.041550e+04 2.041550e+04 0.2377147 0.0002624 0.0002198 0.5655570 0.0002624 493.0221183 1081.6128754 493.5662971 1082.8067179 3.7857143 25 NA low low high
11ND 42097 46872 2054 1551 18188 0.3800672 0.4231776 0.0185443 0.0140030 0.1642079 26.89773 115.53783 92.98237 27.03077 26.89773 0.1289934 0.2892778 0.6830780 1.5318557 0.8951232 0.4326199 0.0146566 0.0079816 0.2765993 0.0146566 1.911018e+01 5.420238e+01 1.975761e+01 5.603869e+01 0.6777854 0.3947415 0.4434028 0.6868704 0.3947415 0.8086739 1.5285999 1.2796448 2.4188549 0.1065574 3 high high high high
11RC 40084 247458 137553 165687 204242 0.0504186 0.3112585 0.1730174 0.2084050 0.2569004 153.33513 178.57028 70.26468 25.53931 153.33513 0.6162122 2.1822504 1.3338375 4.7236450 0.7894172 0.0386688 0.0222283 0.0532631 0.4286498 0.0222283 5.122274e-01 7.259953e-01 8.066755e-01 1.143325e+00 0.0261783 0.0269403 0.0417948 0.1950371 0.0261783 0.3808571 0.6263514 0.7728005 1.2709349 2.6785714 19 NA low low high
11RD 29817 352081 198409 375106 373020 0.0224452 0.2650348 0.1493557 0.2823673 0.2807970 73.82461 68.89859 24.26948 26.26985 68.89859 0.7923810 3.0418697 1.5318895 5.8807677 0.2660563 0.2323507 0.1251300 0.3447698 0.4433712 0.1251300 4.944686e-01 6.739300e-01 7.607594e-01 1.036868e+00 0.2646073 0.2511598 0.3031015 0.4116961 0.2511598 0.4774054 0.8729991 0.9305487 1.7016318 3.6428571 25 NA low low low
12LC 2692 14614 289 125 7425 0.1070591 0.5811891 0.0114933 0.0049712 0.2952873 23.96795 185.03912 202.99400 19.41965 23.96795 0.0617678 0.1180722 0.5386320 1.0296219 0.5118871 0.4342496 0.0115156 0.0018574 0.5401189 0.0115156 3.247222e+01 2.338000e+02 3.333333e+01 2.400000e+02 0.4673802 0.3063251 0.2124277 0.6696129 0.3063251 0.9009690 2.2001850 1.4914721 3.6422059 NA 4 high high high high
12LD 3560 117731 1764 316 35776 0.0223693 0.7397626 0.0110841 0.0019856 0.2247985 12.74758 285.85343 355.94915 26.97636 12.74758 0.0198622 0.0358129 0.4652537 0.8388867 0.1078652 0.8362360 0.0230337 0.0002809 0.3896067 0.0230337 3.586747e+01 2.977000e+03 3.685542e+01 3.059000e+03 0.6771043 0.1575680 0.0985770 0.6385153 0.1575680 2.6434411 6.8687850 3.2585927 8.4672106 0.7702703 6 high high high low
13NA 121602 1156717 0 1221416 917869 0.0355811 0.3384585 0.0000000 0.3573896 0.2685709 70.80101 NA 22.21206 41.36664 70.80101 3.1875036 3.1875036 3.1875036 3.1875036 0.4678459 0.1279338 NA 0.4849838 0.2605385 0.1279338 2.637897e-01 2.637897e-01 2.637897e-01 2.637897e-01 0.2171042 NA 0.3290129 0.2449840 0.2171042 0.6598653 0.6598653 0.6598653 0.6598653 6.2000000 35 NA low low high
14NB 97911 340247 59025 167505 257678 0.1061520 0.3688850 0.0639930 0.1816036 0.2793663 49.22243 100.58773 30.48163 18.51350 49.22243 0.3755449 1.6148228 1.1429838 4.9147687 0.5053671 0.2540062 0.0380039 0.2488995 0.5430442 0.0380039 8.853369e-01 1.020517e+00 1.017799e+00 1.173205e+00 0.3688258 0.2567365 0.4412187 0.6186197 0.2567365 0.5284376 0.8359251 0.8962785 1.4178054 3.3150685 23 NA high low high
14NC 11415 115152 26633 17181 40501 0.0541298 0.5460494 0.1262934 0.0814721 0.1920553 45.49485 76.76388 44.46686 19.26432 45.49485 0.3752749 1.0231182 1.0084796 2.7494347 0.4206746 0.3935173 0.1186159 0.1239597 0.4955760 0.1186159 1.622246e+00 3.174558e+00 2.111232e+00 4.131449e+00 0.2641293 0.2184936 0.3293816 0.5586074 0.2184936 0.4820975 0.8018944 0.8808992 1.4652392 0.5595238 6 NA low high high
14RD 2194 107760 28128 18909 55255 0.0103371 0.5077127 0.1325255 0.0890900 0.2603347 22.18326 43.27617 31.50346 22.40920 22.18326 0.2966485 0.7041532 0.8753645 2.0778489 0.1216955 0.5000000 0.1927985 0.1681860 0.4799453 0.1927985 1.385101e+00 2.972900e+00 1.919192e+00 4.119241e+00 0.4488902 0.4014737 0.4661559 0.4728975 0.4014737 0.5173754 0.9629616 0.9801002 1.8242052 -0.1666667 1 high high high low
15LB 6350 105525 20305 130687 227056 0.0129612 0.2153910 0.0414453 0.2667501 0.4634524 32.69579 70.46882 20.89178 15.83206 32.69579 0.3578762 1.5650073 1.1292024 4.9380478 0.1064567 0.2226772 0.0332283 0.4615748 0.6502362 0.0332283 4.500318e-01 4.824292e-01 5.171865e-01 5.544183e-01 0.2963970 0.2630816 0.3507861 0.3456658 0.2630816 0.4828352 0.8449507 0.9113993 1.5949281 3.5000000 10 high low low low
15NC-D 3993 83111 0 99191 183857 0.0107875 0.2245321 0.0000000 0.2679737 0.4967068 39.22818 NA 23.06369 22.18943 39.22818 1.7008635 1.7008635 1.7008635 1.7008635 0.1344853 0.1299775 NA 0.3683947 0.3005259 0.1299775 3.528212e-01 3.528212e-01 3.528212e-01 3.528212e-01 0.3209594 NA 0.4006575 0.3074837 0.3209594 0.8010817 0.8010817 0.8010817 0.8010817 2.6857143 22 high low low low
15ND 1678 64379 8785 16699 68695 0.0104721 0.4017761 0.0548254 0.1042150 0.4287114 15.54429 53.43276 37.28968 17.33598 15.54429 0.1713390 0.4168524 0.7603086 1.8497622 0.0286055 0.6025030 0.0643623 0.1877235 0.4928486 0.0643623 2.390071e+00 3.209524e+00 2.645390e+00 3.552381e+00 0.4673145 0.5587742 0.5287410 0.4233763 0.4673145 0.4297085 0.8838250 0.9435167 1.9406265 -0.2285714 3 high high high low
15RD 24041 109825 0 0 598394 0.0328312 0.1499809 0.0000000 0.0000000 0.8171879 91.07517 NA NA 11.52589 91.07517 Inf NA Inf NA 0.2216630 0.1578969 NA NA 0.8819101 0.1578969 Inf NA Inf NA 0.3102088 NA NA 0.4379530 0.3102088 Inf NA Inf NA 3.5000000 17 NA low low low
16LA 12242 303229 0 267100 131130 0.0171528 0.4248684 0.0000000 0.3742464 0.1837324 31.26867 NA 19.19006 27.79750 31.26867 1.6294199 1.6294199 1.6294199 1.6294199 0.3536187 0.2750368 NA 0.4480477 0.3018298 0.2750368 6.138560e-01 6.138560e-01 6.138560e-01 6.138560e-01 0.1931754 NA 0.2149619 0.3460849 0.1931754 0.8986496 0.8986496 0.8986496 0.8986496 NA 5 high low high low
16LC 22907 229322 0 115205 112871 0.0476926 0.4774508 0.0000000 0.2398580 0.2349986 71.64542 NA 65.51353 21.56242 71.64542 1.0935973 1.0935973 1.0935973 1.0935973 0.6542978 0.2411490 NA 0.1293491 0.3635133 0.2411490 1.864327e+00 1.864327e+00 1.864327e+00 1.864327e+00 0.0574200 NA 0.0523851 0.1592857 0.0574200 1.0961129 1.0961129 1.0961129 1.0961129 0.6475410 7 NA low low high
16LD 69599 779658 0 905754 446321 0.0316168 0.3541756 0.0000000 0.4114572 0.2027504 38.85862 NA 19.42446 22.61613 38.85862 2.0004993 2.0004993 2.0004993 2.0004993 0.2420006 0.3001624 NA 0.4920473 0.3806520 0.3001624 6.100274e-01 6.100274e-01 6.100274e-01 6.100274e-01 0.4547154 NA 0.4828520 0.6091481 0.4547154 0.9417283 0.9417283 0.9417283 0.9417283 1.9151786 24 high high high low
16ND 137787 687621 242704 572319 188661 0.0753308 0.3759357 0.1326910 0.3128979 0.1031446 29.84045 40.34701 26.33421 47.06903 29.84045 0.4475090 1.1331438 1.0525821 2.6652576 0.4492659 0.2842939 0.0509409 0.3098188 0.0600419 0.0509409 7.880422e-01 9.176134e-01 9.292468e-01 1.082035e+00 0.4245758 0.4176264 0.4836524 0.3800594 0.4176264 0.4710815 0.8778531 0.9344524 1.7413377 1.0745763 25 high high high high
16RD 22697 179849 0 372829 165181 0.0306486 0.2428567 0.0000000 0.5034447 0.2230500 56.31604 NA 16.51236 34.65626 56.31604 3.4105390 3.4105390 3.4105390 3.4105390 0.2959863 0.2354937 NA 0.6114905 0.2825043 0.2354937 3.851142e-01 3.851142e-01 3.851142e-01 3.851142e-01 0.3006802 NA 0.4041937 0.3873276 0.3006802 0.7439011 0.7439011 0.7439011 0.7439011 -0.1027397 3 NA low low low
17NA 114939 444216 0 1252644 619810 0.0472687 0.1826840 0.0000000 0.5151503 0.2548971 61.86642 NA 22.48160 21.57101 61.86642 2.7518686 2.7518686 2.7518686 2.7518686 0.3757123 0.0907525 NA 0.3861962 0.5524496 0.0907525 2.349907e-01 2.349907e-01 2.349907e-01 2.349907e-01 0.2549486 NA 0.2843224 0.5770821 0.2549486 0.8966884 0.8966884 0.8966884 0.8966884 6.8000000 28 NA low low high
17ND 13298 62937 203 1332 41466 0.1115267 0.5278356 0.0017025 0.0111711 0.3477641 15.06388 232.02438 111.85371 18.01790 15.06388 0.0438059 0.1346748 0.7185345 2.2090306 0.3246353 0.6664160 0.0003008 0.0029328 0.4851857 0.0003008 2.060930e+02 2.272308e+02 2.061860e+02 2.273333e+02 0.7675142 0.1896339 0.4520953 0.7379279 0.1896339 1.1960095 1.6976822 1.4915140 2.1171378 -0.3846154 4 high high high high
17RB 85774 1378423 250877 112621 258890 0.0411074 0.6606120 0.1202333 0.0539738 0.1240735 16.70828 88.48121 95.13230 35.71473 16.70828 0.0909970 0.1756320 0.5728853 1.1057178 0.3890573 0.6275561 0.0788001 0.0414694 0.2133747 0.0788001 5.217914e+00 1.513298e+01 5.873110e+00 1.703317e+01 0.3863508 0.2759967 0.2356124 0.4572544 0.2759967 0.7551678 1.6397723 1.2946357 2.8111736 8.2142857 35 high high high high
2N 109694 1143016 335772 714690 1340651 0.0301041 0.3136859 0.0921483 0.1961374 0.3679243 47.77260 87.60001 35.15413 19.20262 47.77260 0.3891730 1.3589470 1.1027947 3.8508309 0.3846518 0.2826408 0.0808522 0.2448721 0.5902966 0.0808522 8.677302e-01 1.154238e+00 1.115953e+00 1.484420e+00 0.3491878 0.2388853 0.3133452 0.4562766 0.2388853 0.6323226 1.1143872 1.0649053 1.8767583 11.7000000 47 NA low high low
2RA 24008 120410 473 10966 123894 0.0858192 0.4304185 0.0016908 0.0391991 0.4428724 44.80857 534.05443 132.80795 15.31048 44.80857 0.0671931 0.3373938 0.8680397 4.3586471 0.3464678 0.4371043 0.0009580 0.0452766 0.6856881 0.0009580 9.454054e+00 9.654094e+00 9.474775e+00 9.675253e+00 0.5644597 0.0274858 0.2713765 0.7331093 0.0274858 1.8886952 2.0799877 1.9806633 2.1812706 3.5428571 20 NA high high high
2RD 31061 111306 51141 76195 148799 0.0742195 0.2659629 0.1222001 0.1820660 0.3555515 38.61221 73.15221 27.66634 21.62384 38.61221 0.3829872 1.3956389 1.1085701 4.0397263 0.3219793 0.1979009 0.1740124 0.2492193 0.5216188 0.1740124 4.675947e-01 7.940835e-01 8.787464e-01 1.492314e+00 0.5678202 0.4565483 0.5544826 0.5877279 0.4565483 0.5616249 1.0240540 1.0131920 1.8474310 20.0000000 20 high high low high
3LA 83808 623759 232788 157121 480097 0.0531246 0.3953915 0.1475608 0.0995967 0.3043263 46.09601 104.05573 65.98973 41.88896 46.09601 0.2710805 0.6985331 0.8830094 2.2753805 0.4243628 0.2865240 0.0929863 0.0477520 0.2387958 0.0929863 2.035863e+00 6.000250e+00 2.696566e+00 7.947526e+00 0.3332982 0.3041986 0.2997380 0.3192354 0.3041986 0.5518761 1.1119649 1.0555690 2.1268463 3.0000000 12 NA low high high
3NB 98748 168767 0 15223 73972 0.2768299 0.4731210 0.0000000 0.0426761 0.2073729 32.31159 NA 62.64161 29.70786 32.31159 0.5158168 0.5158168 0.5158168 0.5158168 0.7233868 0.4377911 NA 0.0275246 0.2218779 0.4377911 1.590545e+01 1.590545e+01 1.590545e+01 1.590545e+01 0.6371794 NA 0.4947252 0.6142263 0.6371794 1.2879462 1.2879462 1.2879462 1.2879462 0.0245902 6 high high high high
3RB 43810 717597 0 625176 256989 0.0266554 0.4366082 0.0000000 0.3803764 0.1563600 42.63162 NA 29.99254 46.16097 42.63162 1.4214077 1.4214077 1.4214077 1.4214077 0.1779959 0.2910751 NA 0.3592331 0.2055695 0.2910751 8.102681e-01 8.102681e-01 8.102681e-01 8.102681e-01 0.3543015 NA 0.4120032 0.4057485 0.3543015 0.8599486 0.8599486 0.8599486 0.8599486 7.6857143 35 NA high high low
3RC 11691 195683 58422 76220 161604 0.0232139 0.3885529 0.1160041 0.1513443 0.3208848 32.11938 55.92834 37.32758 27.31100 32.11938 0.3444219 0.8604733 0.9441516 2.3587846 0.2039175 0.2962108 0.0879309 0.2004961 0.3165683 0.0879309 1.026987e+00 1.477389e+00 1.331851e+00 1.915956e+00 0.3811222 0.3788413 0.4385962 0.3814209 0.3788413 0.4662403 0.8689593 0.9296901 1.7327180 5.5428571 32 high high high low
4LB 37888 542224 163326 231943 574396 0.0244474 0.3498723 0.1053868 0.1496622 0.3706314 30.99117 57.04930 28.06937 16.66993 30.99117 0.3640937 1.1040920 1.0343262 3.1365315 0.2172720 0.2903294 0.1223870 0.2381757 0.5714738 0.1223870 8.052119e-01 1.218972e+00 1.144645e+00 1.732824e+00 0.4265239 0.4040558 0.5049626 0.5312531 0.4040558 0.4692137 0.8446643 0.9137105 1.6448340 6.8783784 35 high high high low
4NC 4456 396164 0 96651 50231 0.0081388 0.7235846 0.0000000 0.1765309 0.0917458 15.33106 NA 30.68159 155.44956 15.33106 0.4996826 0.4996826 0.4996826 0.4996826 0.2675045 0.6851436 NA 0.2188061 0.0832585 0.6851436 3.131282e+00 3.131282e+00 3.131282e+00 3.131282e+00 0.0848612 NA 0.0807651 0.0586587 0.0848612 1.0507163 1.0507163 1.0507163 1.0507163 3.0000000 15 high low high low
4ND 4010 198640 0 265301 57984 0.0076245 0.3776893 0.0000000 0.5044369 0.1102494 20.75556 NA 14.30657 39.67271 20.75556 1.4507707 1.4507707 1.4507707 1.4507707 0.0356608 0.4768080 NA 0.7169576 0.1271820 0.4768080 6.650435e-01 6.650435e-01 6.650435e-01 6.650435e-01 0.4041773 NA 0.4329954 0.3409348 0.4041773 0.9334448 0.9334448 0.9334448 0.9334448 4.3000000 20 high high high low
4RB 69477 752029 407898 556391 694672 0.0280096 0.3031804 0.1644440 0.2243090 0.2800569 53.31478 62.79761 25.09420 28.66758 53.31478 0.6065954 2.1245853 1.3210831 4.6270602 0.3672294 0.1765476 0.1182837 0.3346431 0.3210703 0.1182837 3.897928e-01 5.275699e-01 6.509470e-01 8.810323e-01 0.2509451 0.2272416 0.3686740 0.3511422 0.2272416 0.4211084 0.6806694 0.8024403 1.2970449 7.8108108 40 NA low low low
5LA 30482 234195 0 320467 86337 0.0453952 0.3487738 0.0000000 0.4772540 0.1285770 41.12871 NA 15.84267 35.17202 41.12871 2.5960728 2.5960728 2.5960728 2.5960728 0.3901975 0.2471295 NA 0.5824421 0.1998228 0.2471295 4.242987e-01 4.242987e-01 4.242987e-01 4.242987e-01 0.2946212 NA 0.4144137 0.4699263 0.2946212 0.7109351 0.7109351 0.7109351 0.7109351 0.5000000 6 NA low low high
5LB 7634 139335 0 0 149118 0.0257830 0.4705880 0.0000000 0.0000000 0.5036290 41.91177 NA NA 15.05867 41.91177 Inf NA Inf NA 0.3379618 0.2807178 NA NA 0.6502489 0.2807178 Inf NA Inf NA 0.3399551 NA NA 0.4551737 0.3399551 Inf NA Inf NA 4.0000000 4 NA low high low
5LC 93441 516380 0 833223 371152 0.0515055 0.2846330 0.0000000 0.4592795 0.2045821 82.68448 NA 23.56636 61.02936 82.68448 3.5085804 3.5085804 3.5085804 3.5085804 0.5297032 0.1168545 NA 0.4516540 0.1066662 0.1168545 2.587257e-01 2.587257e-01 2.587257e-01 2.587257e-01 0.1430094 NA 0.2947895 0.1798944 0.1430094 0.4851237 0.4851237 0.4851237 0.4851237 3.5000000 22 NA low low high
5LD 25942 176986 7127 54197 480977 0.0348108 0.2374921 0.0095635 0.0727253 0.6454083 91.41539 174.12079 36.47074 28.68030 91.41539 0.4340886 2.5065407 1.2609063 7.2808006 0.4943721 0.0735101 0.0027369 0.1400046 0.2530645 0.0027369 5.149878e-01 5.250551e-01 5.341615e-01 5.446035e-01 0.1329943 0.1850916 0.3850802 0.1780933 0.1329943 0.2332531 0.3453679 0.5578774 0.8260252 0.9299769 15 NA low low low
5RB 187348 1078667 0 1452387 421479 0.0596672 0.3435375 0.0000000 0.4625612 0.1342341 39.00602 NA 20.88804 28.66510 39.00602 1.8673849 1.8673849 1.8673849 1.8673849 0.4820815 0.1934315 NA 0.5213079 0.2945054 0.1934315 3.710503e-01 3.710503e-01 3.710503e-01 3.710503e-01 0.3599100 NA 0.4620250 0.6079956 0.3599100 0.7789838 0.7789838 0.7789838 0.7789838 10.3571429 35 high high low high
6LDU 78416 377308 0 912277 41718 0.0556253 0.2676477 0.0000000 0.6471339 0.0295931 111.44785 NA 21.09500 421.71225 111.44785 5.2831411 5.2831411 5.2831411 5.2831411 0.4110641 0.1092889 NA 0.6222455 0.0145634 0.1092889 1.756363e-01 1.756363e-01 1.756363e-01 1.756363e-01 0.2047156 NA 0.4431975 0.1223735 0.2047156 0.4619061 0.4619061 0.4619061 0.4619061 2.1974522 32 NA low low high
6ND 170266 980499 223461 528928 183692 0.0815901 0.4698473 0.1070807 0.2534581 0.0880237 28.41556 47.86891 30.70313 51.92693 28.41556 0.3616497 0.9254938 0.9708856 2.4845825 0.5991449 0.2756628 0.0305404 0.2129080 0.0798398 0.0305404 1.132325e+00 1.294750e+00 1.257774e+00 1.438195e+00 0.3247328 0.3222135 0.3681602 0.2951982 0.3222135 0.4703725 0.8820423 0.9370959 1.7572416 30.0000000 40 high low high high
6RB 79608 90700 0 247319 94386 0.1554804 0.1771439 0.0000000 0.4830327 0.1843430 76.37955 NA 15.75336 43.30215 76.37955 4.8484602 4.8484602 4.8484602 4.8484602 0.5258014 0.1446840 NA 0.6712516 0.2653502 0.1446840 2.155435e-01 2.155435e-01 2.155435e-01 2.155435e-01 0.5029649 NA 0.7162637 0.5539084 0.5029649 0.7022064 0.7022064 0.7022064 0.7022064 2.8000000 18 NA high low high
6RC 157455 495313 238235 782142 324989 0.0788010 0.2478878 0.1192287 0.3914362 0.1626462 50.21453 66.49674 18.16188 34.81308 50.21453 0.5931413 2.7648305 1.3786105 6.4261656 0.3830618 0.1966022 0.0674351 0.5424852 0.2411991 0.0674351 3.223408e-01 3.624103e-01 4.329047e-01 4.867181e-01 0.4713741 0.4862458 0.6401989 0.5327762 0.4713741 0.4184618 0.7362931 0.8501259 1.4958161 8.2142857 40 NA high low high
7NB 32396 417604 0 543360 573677 0.0206734 0.2664928 0.0000000 0.3467436 0.3660903 80.17580 NA 23.02340 30.71527 80.17580 3.4823613 3.4823613 3.4823613 3.4823613 0.3887826 0.0728176 NA 0.3846771 0.3060872 0.0728176 1.892955e-01 1.892955e-01 1.892955e-01 1.892955e-01 0.1206553 NA 0.2350231 0.2158776 0.1206553 0.5133764 0.5133764 0.5133764 0.5133764 2.8878505 31 NA low low low
7NC 106074 310952 0 1628191 167163 0.0479457 0.1405509 0.0000000 0.7359455 0.0755580 89.34399 NA 18.52427 73.48726 89.34399 4.8230787 4.8230787 4.8230787 4.8230787 0.5526142 0.0531233 NA 0.5233799 0.0517752 0.0531233 1.015004e-01 1.015004e-01 1.015004e-01 1.015004e-01 0.1534592 NA 0.2455539 0.2388339 0.1534592 0.6249511 0.6249511 0.6249511 0.6249511 3.4444444 40 NA low low high
7RA 72918 530505 441179 772709 466302 0.0319310 0.2323095 0.1931934 0.3383713 0.2041948 33.35958 39.17649 17.01392 25.12197 33.35958 0.5936880 1.9607221 1.2908975 4.2633357 0.2951535 0.2251296 0.1866754 0.4992183 0.1908445 0.1866754 3.282281e-01 4.509642e-01 6.003919e-01 8.248997e-01 0.3395623 0.3529855 0.3949139 0.3654755 0.3395623 0.4540214 0.8598389 0.9259906 1.7536680 1.1153846 8 high low low low
7RB 23710 284804 735372 167172 188017 0.0169469 0.2035659 0.5256130 0.1194875 0.1343866 34.39737 14.99604 27.35225 37.09141 14.99604 0.8122495 1.2575703 1.1663616 1.8058266 0.0670603 0.1917334 0.6787431 0.1155631 0.1097005 0.1917334 2.413848e-01 1.659124e+00 1.095895e+00 7.532482e+00 0.5446202 0.5816950 0.5614016 0.5250031 0.5446202 0.4764429 0.9701081 0.9853194 2.0062559 3.9615385 20 high high low low
7RD 50862 403704 0 886443 240078 0.0321690 0.2553332 0.0000000 0.5606542 0.1518436 44.13770 NA 14.33429 38.10895 44.13770 3.0791694 3.0791694 3.0791694 3.0791694 0.3013841 0.2386457 NA 0.6842436 0.1745901 0.2386457 3.487731e-01 3.487731e-01 3.487731e-01 3.487731e-01 0.3192517 NA 0.4240363 0.3851937 0.3192517 0.7528877 0.7528877 0.7528877 0.7528877 0.9966997 20 NA low low low
8LD 7860 82972 259 508 48175 0.0562336 0.5936154 0.0018530 0.0036344 0.3446635 16.36623 350.73208 292.95079 22.72937 16.36623 0.0254259 0.0558668 0.5703093 1.2531057 0.3456743 0.6508906 0.0008906 0.0035623 0.3871501 0.0008906 1.461714e+02 1.827143e+02 1.463714e+02 1.829643e+02 0.6488849 0.1488781 0.1657167 0.5950220 0.1488781 2.0626057 3.9156287 2.5358434 4.8140179 0.2418033 6 high high high high
8RCL 4518 141557 0 63579 46047 0.0176691 0.5536036 0.0000000 0.2486459 0.1800814 15.12966 NA 36.87314 33.12931 15.12966 0.4103167 0.4103167 0.4103167 0.4103167 0.1051350 0.6186366 NA 0.3669765 0.2673749 0.6186366 1.685766e+00 1.685766e+00 1.685766e+00 1.685766e+00 0.5765175 NA 0.5382005 0.4295692 0.5765175 1.0711947 1.0711947 1.0711947 1.0711947 12.0000000 12 high high high low
8RCU 83679 750075 0 1475968 599152 0.0287668 0.2578575 0.0000000 0.5074018 0.2059739 50.00101 NA 24.56317 37.07321 50.00101 2.0356094 2.0356094 2.0356094 2.0356094 0.2746806 0.2347423 NA 0.4697355 0.2889853 0.2347423 4.997329e-01 4.997329e-01 4.997329e-01 4.997329e-01 0.3676127 NA 0.3679267 0.4564061 0.3676127 0.9991463 0.9991463 0.9991463 0.9991463 5.5142857 27 NA high low low

Firstly, we can compare the different metrics to determine how similar or different they are: They generally associate very well, with knn and IF being the best (they are directly related metrics)

par(mfrow=c(2,2))
a1=cor.test(df.Spatial$knn.EpCAM, df.Spatial$MH.EpCAM)
plot(df.Spatial$knn.EpCAM, df.Spatial$MH.EpCAM, xlab="knn", ylab="MH",
     main=sprintf("cor:%s, p:%s", round(a1$estimate,2), round(a1$p.value, 2)))
a1=cor.test(df.Spatial$IF.EpCAM, df.Spatial$MH.EpCAM)
plot(df.Spatial$MH.EpCAM, df.Spatial$IF.EpCAM,  xlab="MH", ylab="IF",
      main=sprintf("cor:%s, p:%s", round(a1$estimate,2), round(a1$p.value, 2)))
a1=cor.test(df.Spatial$knn.EpCAM, df.Spatial$MH.EpCAM)
plot( df.Spatial$IF.EpCAM,df.Spatial$knn.EpCAM,  xlab="IF", ylab="knn",
       main=sprintf("cor:%s, p:%s", round(a1$estimate,2), round(a1$p.value, 2)))

We can also make comparisons directly against growth rate and tumor size

par(mfrow=c(2,3))

cNames=c("knn.EpCAM", "IF.EpCAM", "MH.EpCAM")

for (i in cNames){
t1=cor.test(df.Spatial[, i],df.Spatial$GrowthRate, use="complete")
plot(df.Spatial[, i]~df.Spatial$GrowthRate, ylab=i, xlab="Growth Rate", main=sprintf("cor:%s p:%s", round(t1$estimate, 2), round(t1$p.value,2)))

t1=cor.test(df.Spatial[ ,i],df.Spatial$TumSize, use="complete")
plot(df.Spatial[, i]~as.numeric(as.character(df.Spatial$TumSize)), ylab=i, xlab="Tumor size", main=sprintf("cor:%s p:%s", round(t1$estimate, 2), round(t1$p.value, 2)))
}

5.5 Distances to “unclassified cells”

We noted that the proportion of Unclassified cells seemed to be different between the treatments. Assess here whether the MH index for this cell type is associated with growth or treatment here:

varsearch="Unclass"

#MHmeltsumm=melt(WSIMHsetup, id.vars=c("gridsize", "Ntiles"))

MHmelt=melt(WSIMH, measure.vars="MH.mean")
MHmelt$SpatialManual=Cdata$Pathologist.CD8.Spatial[match(MHmelt$L1, gsub("_", "", Cdata$TumorID))]
#MHmeltsumm$SpatialManual=Cdata$Pathologist.CD8.Spatial[match(MHmeltsumm$L1, gsub("_", "", Cdata$TumorID))]
MHmelt$Treatment=Cdata$Treatment[match(MHmelt$L1, gsub("_", "", Cdata$TumorID))]
MHmelt$Growth=Cdata$Tumor.Growth[match(MHmelt$L1, gsub("_", "", Cdata$TumorID))]
#MHmelt$Growth=Cdata$Tumor.growth.status[match(MHmelt$L1, gsub("_", "", Cdata$TumorID))]
#MHmelt$Growth[grep("no data", MHmelt$Growth)]="no data"

A1=MHmelt[MHmelt$Var2==varsearch| MHmelt$Var1==varsearch, ]
A1$label=A1$L1
A1$label[which(A1$gridsize!=500)]=NA
A1$Var1=ifelse(A1$Var1==varsearch, as.character(A1$Var2), as.character(A1$Var1))

A2=MHmelt[(MHmelt$Var2==varsearch|MHmelt$Var1==varsearch) & MHmelt$gridsize==250 & MHmelt$Var1%in%c("EpCAM", "SMA"), ]
A3=MHmelt[(MHmelt$Var2==varsearch|MHmelt$Var1==varsearch) & MHmelt$gridsize==250, ]
A3$CD8frac=WSIvalFracs[ 1, match(A3$L1, colnames(WSIvals))]

A3$Var1=ifelse(A3$Var1==varsearch, as.character(A3$Var2), as.character(A3$Var1))

MHTempSumm=A3 #MHmelt[which(MHmelt$gridsize==300 & MHmelt$Var2==varsearch), ]
MHreshape=acast(A3[ ,c("Var1", "value", "L1")], L1~Var1, value.var="value" )
MHreshape=data.frame(MHreshape)

MHreshape$Treatment=A3$Treatment[match(rownames(MHreshape), A3$L1)]
MHreshape$Growth=A3$Growth[match(rownames(MHreshape), A3$L1)]
MHreshape$Infil=A3$SpatialManual[match(rownames(MHreshape), A3$L1)]

colnames(MHreshape)=paste(varsearch, colnames(MHreshape), sep=".")
DT::datatable(MHreshape, rownames=F, class='cell-border stripe',
          extensions="Buttons", options=list(dom="Bfrtip", buttons=c('csv', 'excel'), scrollX=T))
#write.csv(MHreshape, file=sprintf("outputs/%s_MH_comparisons_gridsize250.csv", varsearch))

# pdf(sprintf("rslt/WSI-analysis/MHplots_spatial_growth_treatment_%s_celltype_%s.pdf", varsearch, Sys.Date()), height=7, width=12)
# 
 p<-ggplot(A1, aes(x=gridsize, y=value, col=Growth))+facet_grid(~Var1)+geom_line(aes(group=L1))+ylab("grid size")+ylab("Morisita Horn index")+ggtitle(sprintf("MH index with %s: growth", varsearch))
 print(p)

 
pvals=sapply(unique(A3$Var1), function(x) wilcox.test(A3$value[A3$Var1==x & A3$Growth=="growing"],A3$value[A3$Var1==x & A3$Growth=="stable"])$p.value)

ann_text=data.frame(Growth="stable", y=0.8, label=round(pvals,2), Var1=unique(A3$Var1))
 
ggplot(A3, aes(x=Growth, y=value, col=Growth))+facet_grid(~Var1)+geom_boxplot()+ylab("grid size")+ylab("Morisita Horn index")+ggtitle(sprintf("MH index with %s: growth", varsearch))+
  geom_text(data=ann_text, mapping = aes(x=Growth , y=0.75, label=label))

 

pval2=matrix(NA, nrow=3, ncol=4)
colnames(pval2)=unique(A3$Var1)
rownames(pval2)=TreatV[1:3]

for (i in 1:3){
pval2[i, ]=sapply(unique(A3$Var1), function(x) wilcox.test(A3$value[A3$Var1==x & A3$Treatment==TreatV[i]], A3$value[A3$Var1==x & A3$Treatment=="Vehicle"])$p.value)
}

pmelt=melt(pval2)
colnames(pmelt)=c("Treatment", "Var1", "label")
pmelt$label=round(pmelt$label, 2)
pmelt$value=0.8

p<-ggplot(A1, aes(x=gridsize, y=value, col=Treatment))+facet_grid(~Var1)+geom_line(aes(group=L1))+ylab("grid size")+ylab("Morisita Horn index")+ggtitle(sprintf("MH index with %s: growth", varsearch))
print(p)


ggplot(A3, aes(x=Treatment, y=value, col=Treatment))+facet_grid(~Var1)+geom_boxplot()+ylab("grid size")+ylab("Morisita Horn index")+ggtitle(sprintf("MH index with %s: growth", varsearch))+geom_text(data=pmelt, mapping=aes(x=Treatment, y=0.8, label=label))

We see here that epcam-unclass mixing could also be associated with tumor growth - this could be attributed to a hight number of other immune related cells in this fraction.

Note that we can change the entry cell type to obtain the same plots for other reference cells of interest.