Kraken - NBISweden/workshop-genome_assembly GitHub Wiki
Kraken: Taxonomic classification of sequences
Kraken2
Notes:
- Dependencies: Kraken, Krona
Command:
#!/usr/bin/env bash
module load bioinfo-tools Kraken2 Krona
CPUS="${SLURM_NPROCS:-8}"
JOB=$SLURM_ARRAY_TASK_ID
DATA_DIR=/path/to/reads
FILES=( $DATA_DIR/*_R1.fastq.gz )
KRAKEN2DB=$SNIC_TMP/kraken_db
mkdir -p "$KRAKEN2DB"
rsync -a ${KRAKEN_DB:-/sw/data/uppnex/Kraken/latest}/* $KRAKEN2DB
apply_krakenrp () {
ASSEMBLY="$1" # The assembly is the first parameter to this function. This file must end in .fasta
PREFIX=$( basename "$ASSEMBLY" .fasta )
echo "Running Kraken2: $ASSEMBLY"
kraken2 --threads "$CPUS" --db "$KRAKEN2DB" --report "${PREFIX}_kraken.rpt" --gzip-compressed --paired "$READ1" "$READ2" > "${PREFIX}_kraken.tsv"
ktImportTaxonomy <( cut -f2,3 "${PREFIX}_kraken.tsv" ) -o "${PREFIX}_kraken_krona.html"
}
FASTQ="${FILES[$JOB]}"
apply_krakenrp "$FASTQ" "${FASTQ/_R1./_R2.}"
Kraken2
Notes:
- Dependencies: Kraken, Krona
Command:
#!/usr/bin/env bash
module load bioinfo-tools Kraken2 Krona
CPUS="${SLURM_NPROCS:-8}"
JOB=$SLURM_ARRAY_TASK_ID
FASTA_DIR=/path/to/assemblies
FILES=( $FASTA_DIR/*.fasta )
KRAKEN2DB=$SNIC_TMP/kraken_db
mkdir -p "$KRAKEN2DB"
rsync -a ${KRAKEN_DB:-/sw/data/uppnex/Kraken/latest}/* $KRAKEN2DB
apply_krakenasm () {
ASSEMBLY="$1" # The assembly is the first parameter to this function. This file must end in .fasta
PREFIX=$( basename "$ASSEMBLY" .fasta )
echo "Running Kraken2: $ASSEMBLY"
kraken2 --threads "$CPUS" --db "$KRAKEN2DB" --report "${PREFIX}_kraken.rpt" "$ASSEMBLY" > "${PREFIX}_kraken.tsv"
ktImportTaxonomy <( cut -f2,3 "${PREFIX}_kraken.tsv" ) -o "${PREFIX}_kraken_krona.html"
}
FASTA="${FILES[$JOB]}"
apply_krakenasm "$FASTA"
Kraken v1.
Notes:
- Users will need 175 GB of RAM for full database
- Dependencies: Kraken, Krona
Command:
#!/usr/bin/env bash
module load bioinfo-tools Kraken Krona
CPUS="${SLURM_NPROCS:-8}"
JOB=$SLURM_ARRAY_TASK_ID
DATA_DIR=/path/to/reads
FILES=( $DATA_DIR/*_R1.fastq.gz )
KRAKENDATABASE=$SNIC_TMP/kraken_db
mkdir -p "$KRAKENDATABASE"
rsync -a ${KRAKEN_DB:-/sw/data/uppnex/Kraken/latest}/* $KRAKENDATABASE
apply_krakenrp () {
READ1="$1" # Read 1 of the read pair to be screened
READ2="$2" # Read 2 of the read pair to be screened
if [ "$READ1" == "$READ2" ]; then
>&2 echo "READ1 and READ2 are the same file. R2 Pattern replacement failed. Please check string substitution pattern lower down"
exit 2
fi
echo "Running Kraken: $READ1 $READ2"
PREFIX=$(basename "${READ1%_R1*}")
kraken --threads "$CPUS" --db "$KRAKENDATABASE" --fastq-input --gzip-compressed --paired "$READ1" "$READ2" > "${PREFIX}-kraken_classification.tsv"
kraken-report --db "$KRAKENDATABASE" "${PREFIX}-kraken_classification.tsv" > "${PREFIX}-kraken_classification.rpt"
ktImportTaxonomy <( cut -f2,3 "${PREFIX}-kraken_classification.tsv" ) -o "${PREFIX}-kraken_classification.html"
}
FASTQ="${FILES[$JOB]}"
apply_krakenrp "$FASTQ" "${FASTQ/_R1./_R2.}"
Kraken: Taxonomic classification of sequences
Notes:
- Users will need 175 GB of RAM for full database
- Dependencies: Kraken, Krona
Command:
#!/usr/bin/env bash
module load bioinfo-tools Kraken Krona
CPUS="${SLURM_NPROCS:-8}"
JOB=$SLURM_ARRAY_TASK_ID
FASTA_DIR=/path/to/assemblies
FILES=( $FASTA_DIR/*.fasta )
KRAKENDATABASE=$SNIC_TMP/kraken_db
mkdir -p "$KRAKENDATABASE"
rsync -a ${KRAKEN_DB:-/sw/data/uppnex/Kraken/latest}/* $KRAKENDATABASE
apply_krakenasm () {
ASSEMBLY="$1" # The assembly is the first parameter to this function
echo "Running Kraken: $ASSEMBLY"
PREFIX=$(basename "$ASSEMBLY" .fasta )
kraken --threads "$CPUS" --db "$KRAKENDATABASE" --fasta-input "$ASSEMBLY" > "${PREFIX}-kraken_classification.tsv"
kraken-report --db "$KRAKENDATABASE" "${PREFIX}-kraken_classification.tsv" > "${PREFIX}-kraken_classification.rpt"
ktImportTaxonomy <( cut -f2,3 "${PREFIX}-kraken_classification.tsv" ) -o "${PREFIX}-kraken_classification.html"
}
FASTA="${FILES[$JOB]}"
apply_krakenasm "$FASTA"