Testing sequila - mwiewior/bdg-sequila GitHub Wiki
Welcome to the bdg-sequila wiki!
#local testing
#cleanup
rm -rf ~/.ivy2/cache/org.biodatageeks/bdg-sequila_2.11/*
rm -rf ~/.ivy2/jars/org.biodatageeks_bdg-sequila_2.11*
spark-shell --master=local[2] --driver-memory=4g --packages org.biodatageeks:bdg-sequila_2.11:0.4.1-SNAPSHOT --repositories https://zsibio.ii.pw.edu.pl/nexus/repository/maven-snapshots/ -v
```scala
import org.apache.spark.sql.SequilaSession
import org.biodatageeks.utils.SequilaRegister
val ss = SequilaSession(spark)
SequilaRegister.register(ss)
//val bamPath = "/Users/marek/git/forks/bdg-sequila/src/test/resources/NA12878.slice.bam"
val bamPath = "/Users/marek/data/NA12878.chrom20.ILLUMINA.bwa.CEU.low_coverage.20121211.bam"
val tableNameBAM = "reads"
val dstTable ="reads_dst"
val dstPath = " /Users/marek/data/ctas/*.bam"
ss.sql(s"DROP TABLE IF EXISTS ${tableNameBAM}")
ss.sql(
s"""
|CREATE TABLE ${tableNameBAM}
|USING org.biodatageeks.datasources.BAM.BAMDataSource
|OPTIONS(path "${bamPath}")
|
""".stripMargin)
ss.sql(s"DROP TABLE IF EXISTS ${dstTable}")
spark.time{
ss.sql(
s"""
|CREATE TABLE ${dstTable}
|USING org.biodatageeks.datasources.BAM.BAMDataSource
|OPTIONS(path "${dstPath}") AS SELECT * FROM ${tableNameBAM} limit 1000
|
""".stripMargin) }