Run Eukfinder for Long reads or MAGs - dzhao2019/Eukfinder-Test GitHub Wiki
Eukfinder long_seqs [-h] -l LONG_SEQS -o OUT_NAME --mhlen MHLEN --cdb
CDB -n NUMBER_OF_THREADS -z NUMBER_OF_CHUNKS -t
TAXONOMY_UPDATE -p PLAST_DATABASE -m PLAST_ID_MAP
-a ACC2TAX_DATABASE -e E_VALUE --pid PID --cov COV
-h, --help show this help message and exit
Description
-l LONG_SEQS, --long-seqs LONG_SEQS long sequences file
-o OUT_NAME, --out_name OUT_NAME out name
--mhlen MHLEN, --min-hit-length MHLEN minimum hit length
--cdb CDB, --centrifuge-database CDB path to centrifuge database
-n NUMBER_OF_THREADS, --number-of-threads NUMBER_OF_THREADS Number of threads
-z NUMBER_OF_CHUNKS, --number-of-chunks NUMBER_OF_CHUNKS Number of chunks to split a file
-t TAXONOMY_UPDATE, --taxonomy-update TAXONOMY_UPDATE Set to True the first time the program is used. Otherwise set to False
-p PLAST_DATABASE, --plast-database PLAST_DATABASE path to plast database
-m PLAST_ID_MAP, --plast-id-map PLAST_ID_MAP path to taxonomy map for plast database
-a ACC2TAX_DATABASE, --acc2tax-database ACC2TAX_DATABASE path to acc2tax database
-e E_VALUE, --e-value E_VALUE threshold for plast searches
--pid PID, --percent_id PID percentage identity for plast searches
--cov COV, --coverage COV percentage coverage for plast searches
#!/bin/bash
#$ -S /bin/bash
. /etc/profile
#$ -cwd
#$ -pe threaded 20
cd $PWD
input=your-contings.fasta
prefix=your_outname
# path to databases
plastdb=PlastDB_Jun2020.fasta.nal
plastmap=PlastDB_Jun2020_map.txt
centrifuge=Centrifuge_NewDB_Sept2020
acc2tax=Acc2Tax_Feb122021/
python eukfinder.py long_seqs -l $input -n 48 -z 6 --mhlen 100 -t False \
-p $plastdb -m $plastmap -a $acc2tax -e 0.01 --pid 60 --cov 30 -o $prefix --cdb $centrifuge
# Use 'eukfinder.py long_seqs -h ' for help message
# Set '-t T' if it is the first time the program is used