# Quick script to download a suerat object and then pseudobulk
# the sample RNA counts by cell type or tissue.
# @author Diego

library(Seurat); library(tidyverse); library(Matrix); set.seed(123)

# download extra meta data file to find out cell type
meta <- readRDS(url(paste0('https://krishna.gs.washington.edu/content/',
    'members/DEAP_website/public/cell_to_annotations/rna_meta.rds')))

# download seurat object of interest
# this one is the subsampled data set
data <- readRDS(url(paste0('https://krishna.gs.washington.edu/content/',
    'members/DEAP_website/public/RNA/update/seurat_objects/',
    '14912_29824_subsampled_large_checkpoint_endpoint.Rds')))
# # this one is the full data set - timed out so might need to just download
data <- readRDS(url(paste0('https://krishna.gs.washington.edu/',
    'content/members/DEAP_website/public/RNA/',
    'update/seurat_objects/main.Rds')))
# data <- readRDS('main.Rds') # i just downloaded because it was slow.

# add germ layer to data meta info
# instead of germ layer could use manual_annot which has the finest
# grain cell type annotation.
data$germ <- meta[match(colnames(data), meta$cell),]$germ_layer
data$annot <- meta[match(colnames(data), meta$cell),]$manual_annot

# Make a mask - this helps for quickly summing up reads per cells
# that have a specific annotation.
germ_mask <- Matrix::sparse.model.matrix(~ -1 + germ, data=data@meta.data)
annot_mask <- Matrix::sparse.model.matrix(~ -1 + annot, data=data@meta.data)

# Make pseudobulk matrix. FYI, the empty value represents
# cells that didn't have a germ layer annotation.
germ_counts <- data@assays$RNA@counts %*% germ_mask
annot_counts <- data@assays$RNA@counts %*% annot_mask