Testing sequila - mwiewior/bdg-sequila GitHub Wiki

Welcome to the bdg-sequila wiki!

#local testing
#cleanup
rm -rf ~/.ivy2/cache/org.biodatageeks/bdg-sequila_2.11/*
rm -rf ~/.ivy2/jars/org.biodatageeks_bdg-sequila_2.11*
spark-shell --master=local[2] --driver-memory=4g --packages org.biodatageeks:bdg-sequila_2.11:0.4.1-SNAPSHOT --repositories https://zsibio.ii.pw.edu.pl/nexus/repository/maven-snapshots/ -v

```scala
import org.apache.spark.sql.SequilaSession
import org.biodatageeks.utils.SequilaRegister
val  ss = SequilaSession(spark)
SequilaRegister.register(ss)
//val bamPath = "/Users/marek/git/forks/bdg-sequila/src/test/resources/NA12878.slice.bam"
val bamPath = "/Users/marek/data/NA12878.chrom20.ILLUMINA.bwa.CEU.low_coverage.20121211.bam"
 val tableNameBAM = "reads"
 val dstTable ="reads_dst"
 val dstPath = " /Users/marek/data/ctas/*.bam"
   ss.sql(s"DROP TABLE IF EXISTS ${tableNameBAM}")
   ss.sql(
     s"""
        |CREATE TABLE ${tableNameBAM}
        |USING org.biodatageeks.datasources.BAM.BAMDataSource
        |OPTIONS(path "${bamPath}")
        |
     """.stripMargin)


   ss.sql(s"DROP TABLE IF EXISTS ${dstTable}")
   spark.time{
   ss.sql(
     s"""
        |CREATE TABLE ${dstTable}
        |USING org.biodatageeks.datasources.BAM.BAMDataSource
        |OPTIONS(path "${dstPath}") AS SELECT * FROM ${tableNameBAM} limit 1000
        |
     """.stripMargin) }