FRCBam - NBISweden/workshop-genome_assembly GitHub Wiki

FRC: Misassembly detection

Notes:

  • Include --mp-sam "${MP_ALIGNMENT}" to also include mate-pair data.
  • The combined output is in ${PREFIX}_FRC.txt.
  • Dependencies: FRC, GNUplot

Command:

#!/usr/bin/env bash
PATH="$PATH:/path/to/FRC"
CPUS="${SLURM_NPROCS:-1}"
JOB=$SLURM_ARRAY_TASK_ID

ESTIMATED_GENOME_SIZE=100000000
DATA_DIR=/path/to/BAMs
FILES=( $DATA_DIR/*.bam )

apply_FRC () {
	PE_ALIGNMENT="$1"	# The paired-end BAM alignment file is the first parameter to this function
	GENOME_SIZE="$2"	# The estimated genome size is the second parameter to this function
	PREFIX=$( basename "${PE_ALIGNMENT}" .bam )
	FRC --pe-sam "${PE_ALIGNMENT}" --genome-size "${GENOME_SIZE}" --output "${PREFIX}"
}

BAM="${FILES[$JOB]}"
apply_FRC "$BAM" "$ESTIMATED_GENOME_SIZE"

GNUplot can be used to plot the files:

gnuplot << EOF
set terminal png size 800,600
set output 'FRC_Curve_all_assemblies.png'
set title "FRC Curve" font ",14"
set key right bottom font ",8"
set autoscale
set ylabel "Approximate Coverage (%)"
set xlabel "Feature Threshold"
files = system('find -name "*alignment_FRC.txt"')
plot for [data in files] data using 1:2 with lines title data
EOF