FBU Queries (PDSW19) - uccross/skyhookdm-ceph GitHub Wiki
-
Clone and build skyhook.
-
cd build/ -
Get the data sets.
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity3_50000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity3_500000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity3_5000000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity4_42000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity4_420000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity4_4200000_rows.txt ;
- (Re)launch the virtual cluster, if applicable.
../src/stop.sh;
make -j12 vstart;
../src/stop.sh;
../src/vstart.sh -d -n -x;
bin/rados mkpool tpchflatbuf ;
bin/ceph osd pool set tpchflatbuf size 1 ;
- Write a data set into a ceph object. See the following syntax.
| parameter | description |
|---|---|
| --filename | the name of the file containing the pipe-delimited relation to save in ceph |
| --write_type | rows or cols? |
| --debug | yes or no? |
| --schema_datatypes | comma-delimited list of data types. must match table schema |
| --schema_attnames | comma-delimited list of attribute names |
| --table_name | name you want to give the table |
| --nrows | number of rows in the input file |
| --ncols | number of columns in the input file |
| --targetoid | name of the ceph object this is going to occupy |
| --targetpool | name of the ceph pool to use |
| --writeto | ceph or disk? |
| --targetformat | SFT_FLATBUF_UNION_ROW or SFT_FLATBUF_UNION_COL? |
# FBU_Rows arity-3 1mb (50,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_50000_rows.txt --write_type rows --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_1mb --nrows 50000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --targetformat SFT_FLATBUF_UNION_ROW ;
# FBU_Rows arity-3 1mb (500,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_500000_rows.txt --write_type rows --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_100mb --nrows 500000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --targetformat SFT_FLATBUF_UNION_ROW ;
# FBU_Rows arity-3 1mb (5,000,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_500000_rows.txt --write_type rows --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_100mb --nrows 500000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --targetformat SFT_FLATBUF_UNION_ROW ;
# FBU_Cols arity-3 1mb (50,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_50000_rows.txt --write_type cols --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_1mb --nrows 50000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --cols_per_fb 1 --targetformat SFT_FLATBUF_UNION_COL ;
# FBU_Cols arity-3 1mb (500,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_500000_rows.txt --write_type cols --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_1mb --nrows 500000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --cols_per_fb 1 --targetformat SFT_FLATBUF_UNION_COL ;
# FBU_Cols arity-3 1mb (5,000,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_5000000_rows.txt --write_type cols --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_1mb --nrows 5000000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --cols_per_fb 1 --targetformat SFT_FLATBUF_UNION_COL ;
- Run your queries. Note Skyhook currently only queries objects with the "obj." suffix.
bin/run-query --num-objs 1 --pool tpchflatbuf --wthreads 1 --qdepth 10 --query flatbuf --select "*" ;
bin/run-query --num-objs 1 --pool tpchflatbuf --wthreads 1 --qdepth 10 --query flatbuf --select "att0,lt,25;att1,lt,25.0;" --project-cols att0,att1,att2,att3 --data-schema "0 8 0 0 ATT0 ; 1 12 0 0 ATT1 ; 2 15 0 0 ATT2 ;" ;
bin/run-query --num-objs 1 --pool tpchflatbuf --wthreads 1 --qdepth 10 --query flatbuf --select "att0,lt,25;att1,lt,25.0;" --project-cols att0,att2 --data-schema "0 8 0 0 ATT0 ; 1 12 0 0 ATT1 ; 2 15 0 0 ATT2 ;" ;
bin/run-query --num-objs 1 --pool tpchflatbuf --wthreads 1 --qdepth 10 --query flatbuf --select-preds ";att0,sum,0;" --table-name "atable" --data-schema "0 8 0 0 ATT0 ; 1 12 0 0 ATT1 ; 2 15 0 0 ATT2 ;" ;