-
Notifications
You must be signed in to change notification settings - Fork 0
Pipeline
Serghei Mangul edited this page Jul 22, 2017
·
22 revisions
python ~/code/miCoP/micop.py Sample_1A_short.txt Sample_1A_pg.txt
for f in run*sh ; do ./$f;done
python ~/code2/mergeCsvByField2.py fungi_nReads.txt ~/collab/MetaSUB-Inter-City-Challenge/nReads_metadata.txt fungi_nReads_metadata.txt sample
python ~/code2/mergeCsvByField2.py ameoba_nReads.txt ~/collab/MetaSUB-Inter-City-Challenge/nReads_metadata.txt ameoba_nReads_metadata.txt sample
python ~/code2/mergeCsvByField2.py virus_nReads.txt ~/collab/MetaSUB-Inter-City-Challenge/nReads_metadata.txt virus_nReads_metadata.txt sample
python ~/code2/mergeCsvByField2.py fungi_nReads_richess.txt ~/collab/MetaSUB-Inter-City-Challenge/nReads_metadata.txt fungi_richess_metadata.txt sample
- Total Reads
while read line; do wc -l data/${line}.fastq | awk '{print $1/4",NY,",$2}' | sed 's/data\///' | sed 's/.fastq//' | sed 's/ //';done<NY_samples.txt >nReads_metadata.txt
while read line; do wc -l data/${line}.fastq | awk '{print $1/4",Sacramento,",$2}' | sed 's/data\///' | sed 's/.fastq//' | sed 's/ //';done<Sacramento_samples.txt >>nReads_metadata.txt
while read line; do wc -l data/${line}.fastq | awk '{print $1/4",Sacramento,",$2}' | sed 's/data\///' | sed 's/.fastq//' | sed 's/ //';done<Sacramento_samples.txt >>nReads_metadata.txt
while read line; do wc -l data/${line}.fastq | awk '{print $1/4",Boston,",$2}' | sed 's/data\///' | sed 's/.fastq//' | sed 's/ //';done<Boston_samples.txt >>nReads_metadata.txt
- Fungi
wc -l *sam | awk -F "__UniqueReads_Sorted.sam" '{print $1}' | awk '{if ($1!=0) print $1","$2}' >../metasub_analysis/fungi_nReads.txt
- Ameoba
wc -l *sam | awk -F "__UniqueReads_Sorted.sam" '{print $1}' | awk '{if ($1!=0) print $1","$2}' >../../metasub_analysis/ameoba_nReads.txt
- Virus
wc -l *sam | awk -F "__UniqueReads_Sorted.sam" '{print $1}' | awk '{if ($1!=0) print $1","$2}' >~/scratch/metasub_analysis/virus_nReads.txt
/u/home/s/serghei/collab/MetaSUB-Inter-City-Challenge/virus
while read line;do sed 's/SRR3555059/'${line}'/g' run_example.sh >run_${line}.sh;done<../samples.txt
/u/home/s/serghei/scratch/metasub
~/code/cmd/submit_QSUB_array.sh 16 5
while read line;do sed 's/SRR3555059/'${line}'/g' run_example.sh >run_${line}.sh;done<samples.txt
samtools view SRR3555059.bam | awk 'BEGIN { FS="\t" } { c[$1]++; l[$1,c[$1]]=$0 } END { for (i in c) { if (c[i] == 1) for (j = 1; j <= c[i]; j++) print l[i,j] } }' | sort -t$'\t' -k 3,3 >SRR3555059__UniqueReads_Sorted.sam
python3 CoveragePlot.py SRR3555059__UniqueReads_Sorted.sam /u/home/s/serghei/scratch/metasub/fungi_RefList.txt 100 1 test
grep Malassezia SRR3555059__UniqueReads_Sorted.sam | awk '{print $6,$12}' | sed -E 's/[0-9]+S//g' | grep -v I | grep -v D | sed 's/M//' | sed 's/NM:i://' | awk '{print $1","$2}'
- histograms for x and y
p <- ggplot(data, aes(rl, ed)) + geom_point() + theme_classic()
ggExtra::ggMarginal(p, type = "histogram")
- plot for each city
p1 <- ggplot(data, aes(x = nReads/1000000, y = nReads_ameoba/1000000))
p1+geom_point(aes(color = factor(city),size=1)) + scale_color_manual(values = c("orange","purple","red"))+theme(panel.grid.major =element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme(text = element_text(size=20))