UC Davis TAMA Tutorial 2 - GenomeRIK/workshop_tutorials GitHub Wiki

Using FLNC for mapping and TAMA Collapse

change directory

  cd 1_data

make bash script This converts FLNC bam file into a fasta file

  run_flnc_fasta.sh

fill in

  bam='alz.flnc.bam'
  fasta=`echo ${bam} | sed 's/bam/fa/g'`
  bamtools convert -format fasta -in ${bam}  > ${fasta}

run bash script

  sh run_flnc_fasta.sh

change directory

  cd ../2_map

make bash script This is for running Minimap2

  run_minimap2.sh

fill in

  ref='/share/workshop/isoseq_workshop/rkuo/1_data/hg38.fa'
  query='/share/workshop/isoseq_workshop/rkuo/1_data/alz.flnc.fa'
  outfile='mm2_alz_flnc_hg38.sam'
  minimap2 --secondary=no -ax splice -uf -C5 -t 8 ${ref} ${query} > ${outfile}

run script

  sh run_minimap2.sh

make bash script This converts the sam file to a bam file

  run_sam_to_bam.sh

fill in

  file='mm2_alz_flnc_hg38.sam'
  outfile='mm2_alz_flnc_hg38.bam'
  samtools view -bS ${file} > ${outfile}

make bash script This sorts the bam file

  run_sort.sh

fill in

  filename='mm2_alz_flnc_hg38.bam' 
  prefix='mm2_alz_flnc_hg38_sort.bam'
  samtools sort ${filename} -o ${prefix}

change directory

  cd ../3_collapse/2_tc_flnc_nolde_nc

make bash script

  run_tama_collapse.sh

fill in

  spath='/share/workshop/isoseq_workshop/rkuo/tama/'
  pscript='tama_collapse.py'
  capflag='no_cap'
  fpath='/share/workshop/isoseq_workshop/rkuo/2_map/'
  sam='mm2_alz_flnc_hg38_sort.bam'
  fasta='/share/workshop/isoseq_workshop/rkuo/1_data/hg38.fa'
  prefix=`echo ${sam} | sed 's/\.bam//' | awk '{print "tc_nc_flnc_nolde_"$1}' `
  python ${spath}${pscript} -s ${fpath}${sam} -f ${fasta} -p ${prefix} -d merge_dup -x ${capflag} -a 100 -z 100 -sj sj_priority -sjt 10 -log log_off -b BAM

run script

  sh run_tama_collapse.sh

make bash script

  run_summary_bed.sh

fill in

  file=$1
  echo "Genes"
  cat ${file} | awk -F "\t" '{print $4}' | awk -F ";" '{print $1}' | sort | uniq | wc -l
  echo "Transcripts"
  cat ${file} | awk -F "\t" '{print $4}' | awk -F ";" '{print $2}' | sort | uniq | wc -l
  echo "Multi-exon Transcripts"
  cat ${file} | awk -F "\t" '{if($10>1)print $4}' | awk -F ";" '{print $2}' | sort | uniq | wc -l
  echo "Multi-exon Genes"
  cat ${file} |  awk -F "\t" '{if($10>1)print $4}' |  awk -F ";" '{print $1}' | sort | uniq | wc -l 

run script

  sh run_summary_bed.sh tc_nc_flnc_nolde_mm2_alz_flnc_hg38_sort.bed
⚠️ **GitHub.com Fallback** ⚠️