Busco - NBISweden/workshop-genome_assembly GitHub Wiki
Busco: Core gene space completion estimate
Notes:
- Training species found here: http://bioinf.uni-greifswald.de/augustus/ (e.g. default for Eukaryote is Drosophila)
--longenables Optimization mode for Augustus self-training. May improve results for non-model organisms.
Command:
#!/usr/bin/env bash
module load bioinfo-tools BUSCO/3.0.2b
CPUS="${SLURM_NPROCS:-8}"
JOB=$SLURM_ARRAY_TASK_ID
FASTA_DIR=/path/to/assemblies
FILES=( $FASTA_DIR/*.fasta )
WORKDIR=$PWD
cd $SNIC_TMP
source $BUSCO_SETUP
LINEAGES=( $BUSCO_LINEAGE_SETS/bacteria_odb9 $BUSCO_LINEAGE_SETS/eukaryota_odb9 )
apply_busco () {
ASSEMBLY="$1" # The assembly is the first parameter to this function
LINEAGE="$2" # The lineage is the second parameter to this function
PREFIX="$(basename "$ASSEMBLY" .fasta )_busco-$(basename "$LINEAGE" _odb9 )-line"
run_BUSCO.py -i "$ASSEMBLY" -l "$LINEAGE" -c "$CPUS" -m genome -o "${PREFIX}"
rsync -av *${PREFIX}* $WORKDIR/
}
PAR1=()
PAR2=()
for FASTA in "${FILES[@]}"; do
for LINE in "${LINEAGES[@]}"; do
PAR1+=("$FASTA")
PAR2+=("$LINE")
done
done
if [ -z "${PAR1[$JOB]}" ](/NBISweden/workshop-genome_assembly/wiki/|--z-"${PAR2[$JOB]}"-); then
printf "Missing File and Lineage\n" >&2
exit 1
fi
apply_busco "${PAR1[$JOB]}" "${PAR2[$JOB]}"