wget https://cpan.metacpan.org/authors/id/H/HA/HAARG/local-lib-2.000024.tar.gz tar xf local-lib-2.000024.tar.gz cd local-lib-2.000024 perl Makefile.PL --bootstrap=~/opt make test && make install
wget https://cpan.metacpan.org/authors/id/M/MI/MIYAGAWA/App-cpanminus-1.7043.tar.gz tar xf App-cpanminus-1.7043.tar.gz cd App-cpanminus-1.7043 perl Makefile.PL make test && make install
cd ~/src wget -4 http://www.cpan.org/src/5.0/perl-5.26.1.tar.gz tar xf perl-5.26.1.tar.gz cd perl-5.26.1 ./Configure -des -Dprefix=$HOME/opt/sysoft/perl-5.26.1 make test make install
wget https://downloads.pacbcloud.com/public/data/git-sym/ecoli.m140913_050931_42139_c100713652400000001823152404301535_s1_p0.subreads.tar.gz tar -xvzf ecoli.m140913_050931_42139_c100713652400000001823152404301535_s1_p0.subreads.tar.gz
# read in matrix data using the Matrix package indata <- Matrix::readMM("filtered_peak_bc_matrix/matrix.mtx") # binarize the matrix indata@x[indata@x > 0] <- 1
加载cell元数据
1 2 3 4
# format cell info cellinfo <- read.table("filtered_peak_bc_matrix/barcodes.tsv") row.names(cellinfo) <- cellinfo$V1 names(cellinfo) <- "cells"
# remove all but the first exons per transcript pos <- pos[!duplicated(pos$transcript),] # make a 1 base pair marker of the TSS pos$end <- pos$start + 1
neg <- subset(gene_anno, strand == "-") neg <- neg[order(neg$start, decreasing = TRUE),] # remove all but the first exons per transcript neg <- neg[!duplicated(neg$transcript),] neg$start <- neg$end - 1
# if you had two datasets to normalize, you would pass both: # num_genes should then include all cells from both sets unnorm_ga2 <- unnorm_ga cicero_gene_activities <- normalize_gene_activities(list(unnorm_ga, unnorm_ga2), num_genes)
# First, assign a column in the pData table to umap pseudotime pData(input_cds_lin)$Pseudotime <- pseudotime(input_cds_lin) pData(input_cds_lin)$cell_subtype <- cut(pseudotime(input_cds_lin), 10) binned_input_lin <- aggregate_by_cell_bin(input_cds_lin, "cell_subtype")
之后用Monocle3的fit_models函数寻找差异开发的区域。
1 2 3 4 5 6 7 8 9
# For speed, run fit_models on 1000 randomly chosen genes set.seed(1000) acc_fits <- fit_models(binned_input_lin[sample(1:nrow(fData(binned_input_lin)), 1000),], model_formula_str = "~Pseudotime + num_genes_expressed" ) fit_coefs <- coefficient_table(acc_fits)
# Subset out the differentially accessible sites with respect to Pseudotime pseudotime_terms <- subset(fit_coefs, term == "Pseudotime" & q_value < .05) head(pseudotime_terms)
> str(mat) Formal class'dgCMatrix'[package "Matrix"] with 6 slots ..@ i : int(0) ..@ p : int [1:8190]0000000000 ... ..@ Dim : int [1:2]3201278189 ..@ Dimnames:List of 2 .. ..$:NULL .. ..$:NULL ..@ x : num(0) ..@ factors :list()