NCBI - k821209/pipelines GitHub Wiki

Aspera download

wget http://download.asperasoft.com/download/sw/connect/3.7.2/aspera-connect-3.7.2.141527-linux-64.sh

만약에 SRR304976.sra를 받고 싶다면 절대주소는 다음과 같다.

/sra/sra-instant/reads/ByRun/sra/SRR/SRR304/SRR304976/SRR304976.sra

$ ~/.aspera/connect/bin/ascp  -i /home/k821209/.aspera/connect/etc/asperaweb_id_dsa.openssh -k 1 -T -l20m [email protected]:/sra/sra-instant/reads/ByRun/sra/SRR/SRR304/SRR304976/SRR304976.sra ./

sra toolkit https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software

# Use examples:
$ fastq-dump -X 5 -Z SRR390728
# Prints the first five spots (-X 5) to standard out (-Z). This is a useful starting point for verifying other formatting options before dumping a whole file.
$ fastq-dump -I --split-files SRR390728
$ fastq-dump  --origfmt -I  --split-files --gzip SRR1984571.sra # BWA 쓸때 이거 해줘야 paired end 알아먹음.
# Produces two fastq files (--split-files) containing ".1" and ".2" read suffices (-I) for paired-end data.
$ fastq-dump --split-files --fasta 60 SRR390728
# Produces two (--split-files) fasta files (--fasta) with 60 bases per line ("60" included after --fasta).
$ fastq-dump --split-files --aligned -Q 64 SRR390728
# Produces two fastq files (--split-files) that contain only aligned reads (--aligned; Note: only for files submitted as aligned data), with a quality offset of 64 (-Q 64) Please see the documentation on vdb-dump if you wish to produce fasta/qual data.

# adaptor clip : clip옵션을 주면 adapter 를 때주는듯.
$ fastq-dump --clip -Z  -X 5 SRR1004966.sra
@SRR1004966.1 GZIIFEG01CW94K length=190
GGCAAGGTTTATTGCTCCTCTCGTTGTTGTCACTCGCAACGTAGTAGGCAAGAAG..
..
$ fastq-dump --clip -Z  -X 5 SRR1004966.sra
@SRR1004966.1 GZIIFEG01CW94K length=194
GACTGGCAAGGTTTATTGCTCCTCTCGTTGTTGTCACTCGCAACGTAGTAGGCAAGAAG..

parallel -j 3 "python ncbi_download.py {1}" :::: samples
parallel -j 1  "fastq-dump  --origfmt -I  --gzip {1}; rm {1}" ::: *.sra