Example of bioinformatics code - smangul1/online.bioinformatics GitHub Wiki
Count number of reads in the BAM file with multi-mapped reads
import csv
import sys
import argparse
from collections import Counter
import numpy
import pysam
import random
import os
ap = argparse.ArgumentParser()
ap.add_argument('inBam', help='inBam')
ap.add_argument('out', help='out txt file')
args = ap.parse_args()
filename = os.path.basename(args.inBam)
sample=os.path.splitext(filename)[0]
print sample
samfile = pysam.AlignmentFile(args.inBam) # Change me
fileOut=open(args.out,"w")
n_unique_pe=0
n_unm1_pe=0
n_unm2_pe=0
n_unm_both_pe=0
for read in samfile.fetch(until_eof=True):
if read.is_read1:
if not read.is_secondary:
n_unique_pe+=1
#unmapped reads
if read.is_unmapped and read.mate_is_unmapped:
n_unm_both_pe+=1
elif read.is_unmapped and not read.mate_is_unmapped:
n_unm1_pe+=1
elif not read.is_unmapped and read.mate_is_unmapped:
n_unm2_pe+=1
samfile.close()
fileOut.write("sample,n_unique_pe,n_unm_both_pe,n_unm1_pe,n_unm2_pe\n")
fileOut.write(sample+","+str(n_unique_pe)+","+str(n_unm_both_pe)+","+str(n_unm1_pe)+","+str(n_unm2_pe))
fileOut.write("\n")
Randomly subsample reads from the BAM file
import csv
import sys
import argparse
from collections import Counter
import numpy
import pysam
import random
ap = argparse.ArgumentParser()
ap.add_argument('inBam', help='inBam')
ap.add_argument('outBam', help='outBam')
ap.add_argument('fraction', help='fraction')
args = ap.parse_args()
samfile = pysam.AlignmentFile(args.inBam) # Change me
#output = pysam.AlignmentFile(args.outBam, "wb", template=bam) # Change me
fraction = float(args.fraction)
reads_read1_primary_alignment=set()
for read in samfile.fetch():
if read.is_read1:
if not read.is_secondary or read.is_unmapped:
if random.random() < fraction:
reads_read1_primary_alignment.add(read.query_name)
samfile.close()
print "Number of PE reads selected", len(reads_read1_primary_alignment)
samfile = pysam.AlignmentFile(args.inBam) # Change me
output = pysam.AlignmentFile(args.outBam, "wb", template=samfile) # Change me
for read in samfile.fetch():
if read.query_name in reads_read1_primary_alignment:
output.write(read)
samfile.close()
output.close()
print "done!"