Mapping single-end reads with 'bwa mem'
#!/bin/bash
BWA="$HOME/src.HTseq/bwa/0.7.10/bwa"
SAM2HIT="$HOME/git/HTseq-toolbox/sam/sam-to-sam_hit.py"
THREADS=12
DB="$WORK/PSEAE.db/bwadb/PSEAE_PA14_genome"
DBNAME=$(basename $DB)
DBNAME=${DBNAME/.fa/}
for FQ in $(ls ../fastq.tx/*fastq)
do
SAM=$(basename $FQ)
SAM=${SAM/.called.fastq/_called}
SAM=$SAM"."$DBNAME".bwa_mem.sam"
$BWA mem -t $THREADS $DB $FQ > $SAM
$SAM2HIT $SAM
done
#!/bin/bash
SAM2BEST="$HOME/git/HTseq-toolbox/sam/sam-to-sam_best.py"
for SAM in $(ls *sam_hit)
do
$SAM2BEST $SAM
done
#!/bin/bash
TLOG="$HOME/git/HTseq-toolbox/sam/sam-to-t_base_log.py"
for SAM in $(ls *sam_hit_best)
do
$TLOG $SAM
done
- Output t_base_log file format
Target Pos Count+ Count- Quadruple Border A T G C
#Target: PSEAE_PA14_NC_008463
PSEAE_PA14|NC_008463 0 2 5 7 0 0 7 0 0
PSEAE_PA14|NC_008463 1 2 5 7 0 0 7 0 0
PSEAE_PA14|NC_008463 2 2 5 7 0 0 7 0 0
Calculate gene_tpm with t_base_log and GFF
#!/bin/bash
GENE_TPM="$HOME/git/HTseq-toolbox/align/t_base_log+gff-to-gene_tpm.py"
GFF = '/work/PSEAE_net/genome/PSEAE_PA14_genome.gff3.gz'
for LOG in $(ls *log.gz)
do
OUT=${LOG/.t_base_log.gz/}".gene_tpm"
echo $OUT
$GENE_TPM $LOG $GFF > $OUT
done