UC Davis TAMA Tutorial 2 - GenomeRIK/workshop_tutorials GitHub Wiki
Using FLNC for mapping and TAMA Collapse
change directory
cd 1_data
make bash script This converts FLNC bam file into a fasta file
run_flnc_fasta.sh
fill in
bam='alz.flnc.bam' fasta=`echo ${bam} | sed 's/bam/fa/g'` bamtools convert -format fasta -in ${bam} > ${fasta}
run bash script
sh run_flnc_fasta.sh
change directory
cd ../2_map
make bash script This is for running Minimap2
run_minimap2.sh
fill in
ref='/share/workshop/isoseq_workshop/rkuo/1_data/hg38.fa' query='/share/workshop/isoseq_workshop/rkuo/1_data/alz.flnc.fa' outfile='mm2_alz_flnc_hg38.sam' minimap2 --secondary=no -ax splice -uf -C5 -t 8 ${ref} ${query} > ${outfile}
run script
sh run_minimap2.sh
make bash script This converts the sam file to a bam file
run_sam_to_bam.sh
fill in
file='mm2_alz_flnc_hg38.sam' outfile='mm2_alz_flnc_hg38.bam' samtools view -bS ${file} > ${outfile}
make bash script This sorts the bam file
run_sort.sh
fill in
filename='mm2_alz_flnc_hg38.bam' prefix='mm2_alz_flnc_hg38_sort.bam' samtools sort ${filename} -o ${prefix}
change directory
cd ../3_collapse/2_tc_flnc_nolde_nc
make bash script
run_tama_collapse.sh
fill in
spath='/share/workshop/isoseq_workshop/rkuo/tama/' pscript='tama_collapse.py' capflag='no_cap' fpath='/share/workshop/isoseq_workshop/rkuo/2_map/' sam='mm2_alz_flnc_hg38_sort.bam' fasta='/share/workshop/isoseq_workshop/rkuo/1_data/hg38.fa' prefix=`echo ${sam} | sed 's/\.bam//' | awk '{print "tc_nc_flnc_nolde_"$1}' ` python ${spath}${pscript} -s ${fpath}${sam} -f ${fasta} -p ${prefix} -d merge_dup -x ${capflag} -a 100 -z 100 -sj sj_priority -sjt 10 -log log_off -b BAM
run script
sh run_tama_collapse.sh
make bash script
run_summary_bed.sh
fill in
file=$1 echo "Genes" cat ${file} | awk -F "\t" '{print $4}' | awk -F ";" '{print $1}' | sort | uniq | wc -l echo "Transcripts" cat ${file} | awk -F "\t" '{print $4}' | awk -F ";" '{print $2}' | sort | uniq | wc -l echo "Multi-exon Transcripts" cat ${file} | awk -F "\t" '{if($10>1)print $4}' | awk -F ";" '{print $2}' | sort | uniq | wc -l echo "Multi-exon Genes" cat ${file} | awk -F "\t" '{if($10>1)print $4}' | awk -F ";" '{print $1}' | sort | uniq | wc -l
run script
sh run_summary_bed.sh tc_nc_flnc_nolde_mm2_alz_flnc_hg38_sort.bed