Bandage - NBISweden/workshop-genome_assembly GitHub Wiki

Bandage: Assembly graph assessment

Notes:

  • View the graph to get an idea of the assembly.
  • Can be used to filter the assembly.

Command:

#!/usr/bin/env bash
PATH="$PATH:/sw/apps/bioinfo/Bandage/0.8.0/milou"
CPUS="${SLURM_NPROCS:-1}"
JOB=$SLURM_ARRAY_TASK_ID

GFA_DIR=/path/to/gfa
FILES=( $GFA_DIR/*assembly_graph_with_scaffolds.gfa )

GFA="${FILES[$JOB]}"
Bandage image $GFA ${GFA/.gfa/.png}

Filtering GFA's to make CSV files.

Blast labels for all segments from a spades GFA:

GFA_FILE=spades_k21-55_full.gfa
BLAST_FILE=spades_k21-55_full_blast_alignment.tsv
PREFIX=$(basename "$BLAST_FILE" _blast_alignment.tsv )
ANNOTATION=$(mktemp)
CONTIG_SEGMENT_ID=$(mktemp)

# Get blast annotations - Column 1 and 15, and keep first hit only. (Manipulate as necessary to keep relevant info, e.g. Domain, etc)
cut -f1,15 "$BLAST_FILE" | sort -u -k1,1 > "${ANNOTATION}"

# Get Segment ID's and Path ID's from GFA - manipulating a spades style GFA.
awk '$1 == "P" { split($3,a,","); for (i in a) { print a[i] "\t" $2} }' "$GFA_FILE" | cut -f1-6 -d"_" > "${CONTIG_SEGMENT_ID}"

# Merge data on the contig names, printing also rows that have no blast hit (left join)
printf "Node,Contig Name,Blast species\n" > "${PREFIX}_bandage_labels.csv"
join -1 2 -2 1 -a 1 <( sort -k2,2 "${CONTIG_SEGMENT_ID}" ) <( sort -k1,1 "${ANNOTATION}" ) -t $'\t' -o 1.1,1.2,2.2 | tr "\t" "," >> "${PREFIX}_bandage_labels.csv"

Manipulation for a project:

# Makes Label files for csvs
#!/usr/bin/env bash
PATH="$PATH:/sw/apps/bioinfo/Bandage/0.8.0/milou"
CPUS="${SLURM_NPROCS:-1}"
JOB=$SLURM_ARRAY_TASK_ID

GFA_DIR=/path/to/gfa
BLAST_DIR=/path/to/blast_results
FILES=( $GFA_DIR/*assembly_graph_with_scaffolds.gfa )

make_gfa_csv () {
	ASSEMBLY_GRAPH=$1
	BLAST_RESULTS=$2
	PREFIX=$(basename "$ASSEMBLY_GRAPH" .gfa)
	echo "Node,Contig Name,Species,Domain,Colour" > "${PREFIX}.blast_labels.csv"
	join -t "	" -o 1.2,1.1,2.2,2.3 \
		<( grep "^P" "$ASSEMBLY_GRAPH" | cut -f2,3 | sed 's/_[0-9]\t/\t/' | sort -k1,1 ) \
		<( cut -f1,15,16 $BLAST_RESULTS | sort -t"_" -k2n | uniq | sed 's/_pilon//' | sort -k1,1 ) | tr "\t" "," \
		| awk '{ if(/virus/) print $0",2F6D80"; else if (/Eukaryota/) print $0",2A66B1"; else if (/phiX/) print $0",E9815C"; else print $0",57AB57" }' >> "${PREFIX}.blast_labels.csv"
}

GFA="${FILES[$JOB]}"
BLAST_FILE="$BLAST_DIR/$(basename $(dirname $GFA)) -spades_assembly)-polished_spades_assembly_blast_alignment.tsv"
make_gfa_csv $GFA $BLAST_FILE