Skip to content

Pipeline

Serghei Mangul edited this page Jul 22, 2017 · 22 revisions

Micop

python ~/code/miCoP/micop.py Sample_1A_short.txt Sample_1A_pg.txt
for f in run*sh ; do ./$f;done

Metadata

python ~/code2/mergeCsvByField2.py fungi_nReads.txt ~/collab/MetaSUB-Inter-City-Challenge/nReads_metadata.txt fungi_nReads_metadata.txt  sample
python ~/code2/mergeCsvByField2.py ameoba_nReads.txt ~/collab/MetaSUB-Inter-City-Challenge/nReads_metadata.txt ameoba_nReads_metadata.txt  sample
python ~/code2/mergeCsvByField2.py virus_nReads.txt ~/collab/MetaSUB-Inter-City-Challenge/nReads_metadata.txt virus_nReads_metadata.txt  sample
python ~/code2/mergeCsvByField2.py fungi_nReads_richess.txt ~/collab/MetaSUB-Inter-City-Challenge/nReads_metadata.txt fungi_richess_metadata.txt  sample

Number of reads

  • Total Reads
while read line; do wc -l data/${line}.fastq | awk '{print $1/4",NY,",$2}' | sed 's/data\///' | sed 's/.fastq//' | sed 's/ //';done<NY_samples.txt  >nReads_metadata.txt
while read line; do wc -l data/${line}.fastq | awk '{print $1/4",Sacramento,",$2}' | sed 's/data\///' | sed 's/.fastq//' | sed 's/ //';done<Sacramento_samples.txt  >>nReads_metadata.txt
while read line; do wc -l data/${line}.fastq | awk '{print $1/4",Sacramento,",$2}' | sed 's/data\///' | sed 's/.fastq//' | sed 's/ //';done<Sacramento_samples.txt  >>nReads_metadata.txt
while read line; do wc -l data/${line}.fastq | awk '{print $1/4",Boston,",$2}' | sed 's/data\///' | sed 's/.fastq//' | sed 's/ //';done<Boston_samples.txt  >>nReads_metadata.txt

- Fungi
wc -l *sam | awk -F "__UniqueReads_Sorted.sam" '{print $1}' | awk '{if ($1!=0) print $1","$2}'  >../metasub_analysis/fungi_nReads.txt
- Ameoba
wc -l *sam | awk -F "__UniqueReads_Sorted.sam" '{print $1}' | awk '{if ($1!=0) print $1","$2}'  >../../metasub_analysis/ameoba_nReads.txt 
- Virus
wc -l *sam | awk -F "__UniqueReads_Sorted.sam" '{print $1}' | awk '{if ($1!=0) print $1","$2}'  >~/scratch/metasub_analysis/virus_nReads.txt 

Virus

/u/home/s/serghei/collab/MetaSUB-Inter-City-Challenge/virus

microsporidia

while read line;do sed 's/SRR3555059/'${line}'/g' run_example.sh >run_${line}.sh;done<../samples.txt 

Fungi

/u/home/s/serghei/scratch/metasub
~/code/cmd/submit_QSUB_array.sh 16 5
while read line;do sed 's/SRR3555059/'${line}'/g' run_example.sh >run_${line}.sh;done<samples.txt 
samtools view SRR3555059.bam | awk 'BEGIN { FS="\t" } { c[$1]++; l[$1,c[$1]]=$0 } END { for (i in c) { if (c[i] == 1) for (j = 1; j <= c[i]; j++) print l[i,j] } }' | sort -t$'\t' -k 3,3  >SRR3555059__UniqueReads_Sorted.sam

python3 CoveragePlot.py SRR3555059__UniqueReads_Sorted.sam /u/home/s/serghei/scratch/metasub/fungi_RefList.txt 100 1 test

grep Malassezia SRR3555059__UniqueReads_Sorted.sam | awk '{print $6,$12}' | sed -E 's/[0-9]+S//g' | grep -v I | grep -v D | sed 's/M//' | sed 's/NM:i://' | awk '{print $1","$2}'

R code

  • histograms for x and y
p <- ggplot(data, aes(rl, ed)) + geom_point() + theme_classic()
ggExtra::ggMarginal(p, type = "histogram")
  • plot for each city
p1 <- ggplot(data, aes(x = nReads/1000000, y = nReads_ameoba/1000000))
p1+geom_point(aes(color = factor(city),size=1)) + scale_color_manual(values = c("orange","purple","red"))+theme(panel.grid.major =element_blank(), panel.grid.minor = element_blank(),                                                                                                panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme(text = element_text(size=20))
Clone this wiki locally