Mapping single-end reads with 'bwa mem'
 
#!/bin/bash
BWA="$HOME/src.HTseq/bwa/0.7.10/bwa"
SAM2HIT="$HOME/git/HTseq-toolbox/sam/sam-to-sam_hit.py"
THREADS=12
DB="$WORK/PSEAE.db/bwadb/PSEAE_PA14_genome"
DBNAME=$(basename $DB)
DBNAME=${DBNAME/.fa/}
for FQ in $(ls ../fastq.tx/*fastq)
do
  SAM=$(basename $FQ)
  SAM=${SAM/.called.fastq/_called}
  SAM=$SAM"."$DBNAME".bwa_mem.sam"
  $BWA mem -t $THREADS $DB $FQ > $SAM
  $SAM2HIT $SAM
done
#!/bin/bash
SAM2BEST="$HOME/git/HTseq-toolbox/sam/sam-to-sam_best.py"
for SAM in $(ls *sam_hit)
do
  $SAM2BEST $SAM
done
#!/bin/bash
TLOG="$HOME/git/HTseq-toolbox/sam/sam-to-t_base_log.py"
for SAM in $(ls *sam_hit_best)
do
  $TLOG $SAM
done
- Output t_base_log file format
 
Target	Pos	Count+	Count-	Quadruple	Border	A  T  G  C
#Target: PSEAE_PA14_NC_008463
PSEAE_PA14|NC_008463	0	2	5	7	0	0  7  0  0
PSEAE_PA14|NC_008463	1	2	5	7	0	0  7  0  0
PSEAE_PA14|NC_008463	2	2	5	7	0	0  7  0  0
Calculate gene_tpm with t_base_log and GFF
 
#!/bin/bash
GENE_TPM="$HOME/git/HTseq-toolbox/align/t_base_log+gff-to-gene_tpm.py"
GFF = '/work/PSEAE_net/genome/PSEAE_PA14_genome.gff3.gz'
for LOG in $(ls *log.gz)
do
  OUT=${LOG/.t_base_log.gz/}".gene_tpm"
  echo $OUT
  $GENE_TPM $LOG $GFF > $OUT
done