Busco - NBISweden/workshop-genome_assembly GitHub Wiki

Busco: Core gene space completion estimate

Notes:

  • Training species found here: http://bioinf.uni-greifswald.de/augustus/ (e.g. default for Eukaryote is Drosophila)
  • --long enables Optimization mode for Augustus self-training. May improve results for non-model organisms.

Command:

#!/usr/bin/env bash

module load bioinfo-tools BUSCO/3.0.2b
CPUS="${SLURM_NPROCS:-8}"
JOB=$SLURM_ARRAY_TASK_ID

FASTA_DIR=/path/to/assemblies
FILES=( $FASTA_DIR/*.fasta )

WORKDIR=$PWD
cd $SNIC_TMP
source $BUSCO_SETUP
LINEAGES=( $BUSCO_LINEAGE_SETS/bacteria_odb9 $BUSCO_LINEAGE_SETS/eukaryota_odb9 )

apply_busco () {
	ASSEMBLY="$1" 		# The assembly is the first parameter to this function
	LINEAGE="$2"		# The lineage is the second parameter to this function
	PREFIX="$(basename "$ASSEMBLY" .fasta )_busco-$(basename "$LINEAGE" _odb9 )-line"
	run_BUSCO.py -i "$ASSEMBLY" -l "$LINEAGE" -c "$CPUS" -m genome -o "${PREFIX}"
	rsync -av *${PREFIX}* $WORKDIR/
}

PAR1=()
PAR2=()
for FASTA in "${FILES[@]}"; do
	for LINE in "${LINEAGES[@]}"; do
		PAR1+=("$FASTA")
		PAR2+=("$LINE")
	done
done

if [ -z "${PAR1[$JOB]}" ](/NBISweden/workshop-genome_assembly/wiki/|--z-"${PAR2[$JOB]}"-); then
	printf "Missing File and Lineage\n" >&2
	exit 1
fi
apply_busco "${PAR1[$JOB]}" "${PAR2[$JOB]}"