-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmake-fig2.R
More file actions
232 lines (185 loc) · 9.05 KB
/
make-fig2.R
File metadata and controls
232 lines (185 loc) · 9.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
suppressPackageStartupMessages(library(pacman))
suppressPackageStartupMessages(p_load(Seurat))
suppressPackageStartupMessages(p_load(dplyr))
suppressPackageStartupMessages(p_load(plyr))
suppressPackageStartupMessages(p_load(rjson))
suppressPackageStartupMessages(p_load(reshape))
suppressPackageStartupMessages(p_load(geojsonio))
suppressPackageStartupMessages(p_load(cowplot))
suppressPackageStartupMessages(p_load(ggplot2))
input.dir <- "/home/whitebr/pdxnet/"
input.dir <- "."
hovernet_centroid_file <- paste0(input.dir, "/pdmr-hovernet-centroids.csv")
json_cell_info_file <- paste0(input.dir, "/type_info.json")
# This file is generated from pdmr-target-cell-type-cnts-area
# But, we should be able to get it from the centroid info
cell.type.cnts.area <- read.table("pdmr-target-cell-type-cnts-area.csv", sep=",", header=TRUE)
mask.area <- read.table("pdmr-target-mask-area.csv", sep=",", header=TRUE)
target_image_id <- 10256
hne_img_file <- paste0("/Users/whitebr/work/jax/HistoQC/old-histoqcs/histoqc_output_pdmr_unmarked_v2.1-no-blur-no-fat-no-cover-no-pen/", target_image_id, ".svs/", target_image_id, ".svs_thumb.png")
# hne_img_file <- paste0("/home/whitebr/pdxnet/", target_image_id, ".svs_thumb.png")
geojson_file <- paste0(input.dir, "/", target_image_id, ".geojson")
hovernet_centroids <- read.table(hovernet_centroid_file, sep=",", header=TRUE)
# We often upscale images to 40x to be run through Hover-Net (as it was trained on 40x).
# Downsample to the 20x resolution of the PDMR images.
scale.factor <- 2
# Convert the centroids to coordinates
tile_size <- 512
hovernet_centroids$tile_x <- ( tile_size * floor(hovernet_centroids$centroid_x / scale.factor / tile_size) ) + ( tile_size / 2 )
hovernet_centroids$tile_y <- ( tile_size * floor(hovernet_centroids$centroid_y / scale.factor / tile_size) ) + ( tile_size / 2 )
# Read in the labels for the cell types
cell_info_tbl = fromJSON(file = json_cell_info_file)
cell_type = as.numeric(names(cell_info_tbl))
cell_type_label = as.vector(unlist(lapply(cell_info_tbl, function(x) x[[1]])))
cell_info_tbl <- data.frame(cell_type_label = cell_type_label, cell_type = cell_type)
hovernet_centroids <- merge(hovernet_centroids, cell_info_tbl)
# Count the number of cells of each type
hovernet_tiles <-
ddply(hovernet_centroids[, c("image_id", "tile_x", "tile_y", "cell_type_label")],
.variables = c("image_id", "tile_x", "tile_y"),
.parallel = FALSE,
.fun = function(df) {
tbl <- as.data.frame(table(df$cell_type_label))
colnames(tbl) <- c("cell_type_label", "count")
tbl
})
tile_mat <- cast(hovernet_tiles, formula = image_id + tile_x + tile_y ~ cell_type_label, values = "count", fill = 0)
# target_image_id <- hovernet_tiles[1, "image_id"]
hne_img <- png::readPNG(hne_img_file)
# grid::grid.raster(hne_img)
tile_tbl <- subset(tile_mat, image_id == target_image_id)
wsi_x <- max(subset(hovernet_centroids, image_id == hovernet_tiles[1, "image_id"])$centroid_x)
wsi_y <- max(subset(hovernet_centroids, image_id == hovernet_tiles[1, "image_id"])$centroid_y)
#wsi_x <- 17927
#wsi_y <- 13346
wsi_x <- 27887
wsi_y <- 16731
tile_tbl$tile_x <- tile_tbl$tile_x * ( ncol(hne_img) / wsi_x )
tile_tbl$tile_y <- tile_tbl$tile_y * ( nrow(hne_img) / wsi_y )
# tile_tbl$tile_x <- tile_tbl$tile_x * tile_size * scale.factor * ( 1.5 / 40 )
# tile_tbl$tile_y <- tile_tbl$tile_y * tile_size * scale.factor * ( 1.5 / 40 )
tile_names <- paste0(tile_tbl$tile_x, "_", tile_tbl$tile_y)
coordinates <- data.frame(imagecol = tile_tbl$tile_x, imagerow = tile_tbl$tile_y)
hov_cols <- c("connec", "inflam", "neopla", "necros", "nolabe", "no-neo")
hover_net_data <- as.matrix(tile_tbl[, hov_cols])
colnames(hover_net_data) <- hov_cols
row.names(coordinates) <- tile_names
row.names(hover_net_data) <- tile_names
# For some reason as.matrix is making hover_net_data a "cast_matrix", which Seurat does not understand
class(hover_net_data) <- c("matrix", "array")
radius=1/80 #just did trial and error here for dot size on plots
img <- new(
Class='VisiumV1',
image=hne_img, # an array with the image data
assay="Spatial",
key="image_",
scale.factors=scalefactors(spot=1,fiducial=1,hires=1,lowres=1),
coordinates=coordinates, #data.frame with rows as spots (tiles) and axes in the columns
spot.radius=radius # radius size, set by trial and error above
)
seurat_obj <- CreateSeuratObject(counts = t(hover_net_data), assay = "Spatial", project = "PDXNetPDMR")
seurat_obj@images$image <- img
# See bug that is causing distortion here:
# https://github.com/satijalab/seurat/issues/5141
coord <- GetTissueCoordinates(object = seurat_obj@images$image)
myratio <- (max(coord$imagerow) - min(coord$imagerow)) / (max(coord$imagecol) - min(coord$imagecol))
g.hov <- SpatialFeaturePlot(seurat_obj, features=c("neopla"), alpha=c(0.5,1),crop=FALSE) + theme(aspect.ratio = myratio)
png(paste0(target_image_id, "-hovernet-seurat.png"))
print(g.hov)
d <- dev.off()
library(geojsonio)
spdf <- geojson_read(geojson_file)
gsf <- geojson_sf(geojson_file)
library(geojsonio)
gjr <- geojson_read(geojson_file, what="sp")
# Note that ggplot2 fortify looks like this:
# ggplot2:::fortify.SpatialPolygons
#function (model, data, ...)
#{
# rbind_dfs(lapply(model@polygons, fortify))
#}
# write our own to include the classification
my.fortify <- function(model) {
dfs <- lapply(model@polygons, fortify)
for(i in 1:length(dfs)) {
dfs[[i]]$classification <- model@data$classification[i]
}
df <- do.call(rbind, dfs)
return(df)
}
# gjrf <- fortify(gjr)
gjrf <- my.fortify(gjr)
for(nm in c("Tumor", "Necrosis", "Stroma")) {
flag <- grepl(gjrf$classification, pattern=nm)
gjrf[flag,"classification"] <- nm
}
gjrf <- na.omit(gjrf)
# see https://randomjohn.github.io/r-geojson-gardens/
g.anno <- ggplot(data=gjrf) + geom_polygon(aes(x=long,y=lat,group=group, fill=classification)) + coord_fixed(ratio = 1) + scale_y_reverse()
g.anno <- g.anno + theme(axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
g.anno <- g.anno + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())
png(paste0(target_image_id, "-anno.png"))
print(g.anno)
d <- dev.off()
p_load(openxlsx)
metadata.dir <- "../metadata"
# metadata.files <- list.files(metadata.dir)
metadata.files <- list(
"PDMR" = "PDXNETimage_metadata_pdmr_standardized.xlsx",
"Wistar" = "PDXNETimage_metadata_wistar_standardized.xlsx",
"MDACC" = "Raso_Meric_Breast_Images_MDACC_standardized.xlsx",
"HCI" = "PDXNETimage_metadata_HCI_20x_standardized.xlsx",
"WUSTL" = "PDXNETimage_metadata_wustl_standardized.xlsx",
"WUSTL" = "WUPDTC_image_metadata.set2.20210715_standardized.xlsx",
"BCM" = "PDXNETimage_metadata_bcm_standardized.xlsx",
"MDACC" = "PDXNETimage_metadataMDACCbf_lung1_standardized.xlsx",
"MDACC" = "PDXimage032021_MDACClung2_BF_standardized V2.xlsx"
)
metadata <-
llply(metadata.files,
.fun = function(file) {
df <- read.xlsx(paste0(metadata.dir, "/", file), sheet = 1, startRow = 3)
df
})
metadata.all <- ldply(metadata)
colnames(metadata.all)[1] <- "dataset"
write.table(metadata.all, file="all-metadata.tsv", sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)
tmp <- merge(cell.type.cnts.area, mask.area)
tmp <- merge(tmp, cell_info_tbl)
# In this _old_ run from which these data were derived, hovernet
# was run at 20x
# the mask is 1.5x
scale.factor <- 20 / 1.5
scale.factor <- scale.factor * scale.factor
tmp$hovernet_pred <- as.numeric(tmp$cell_type_area) / ( scale.factor * as.numeric(tmp$mask_nz_pixels) )
tmp2 <- metadata[["PDMR"]][, c("Image.file.name", "Percent.tumor.content", "Percent.necrotic.content", "Percent.stromal.content", "Diagnosis")]
colnames(tmp2) <- c("image", "neopla", "necros", "connec", "Diagnosis")
tmp2$image <- as.character(tmp2$image)
tmp2$image <- paste0(tmp2$image, ".svs")
library(reshape2)
tmp2 <- melt(tmp2, id.vars=c("Diagnosis", "image"))
colnames(tmp2) <- c("Diagnosis", "image", "cell_type_label", "path_obs")
tmp <- merge(tmp, tmp2)
tmp$path_obs <- as.numeric(tmp$path_obs) / 100
# diagnoses <- diagnoses[!(diagnoses %in% c("Adenocarcinoma - rectum", "Colorectal cancer, NOS", "Lung adenocarcinoma"))]
diagnoses <- c("Squamous cell lung carcinoma")
tmp <- subset(tmp, Diagnosis %in% diagnoses)
tmp <- subset(tmp, cell_type_label == "neopla")
p_load(ggpubr)
g <- ggplot(data = tmp, aes(x = path_obs, y = hovernet_pred))
g <- g + geom_point()
g <- g + geom_smooth(method = "lm")
#g <- g + facet_wrap(Diagnosis ~ cell_type_label, scales = "free")
g <- g + xlab("Pathologist Assessment of Cell Type") + ylab("Hover-Net Prediction of Cell Type")
g <- g + stat_cor(aes(x = path_obs, y = hovernet_pred), method = "pearson", size=8)
g <- g + theme(axis.title=element_text(size=20))
g <- g + theme(axis.text.x=element_text(size=20))
g <- g + theme(axis.text.y=element_text(size=20))
g <- g + theme(strip.text=element_text(size=20))
g <- g + theme(legend.text=element_text(size=20))
g <- g + theme(legend.title=element_text(size=20))
g <- g + ggtitle("PDMR LUSC")
# g.all <- plot_grid(g.hov, g.anno, g, labels="AUTO")
png("pdmr-lusc-hov-vs-path.png")
print(g)
d <- dev.off()