AWS Athena CTAS Queries Examples - isgaur/AWS-BigData-Solutions GitHub Wiki

Athena CTAS Queries Example :

## Example to create a table by copying all columns from a table:

    CREATE TABLE new_table AS 
    SELECT * 
    FROM old_table;

Your SELECT statement can also includes a WHERE clause. In this case, the query selects only those rows from the table that satisfy the WHERE clause:

     CREATE TABLE new_table AS 
     SELECT * 
     FROM old_table 
     WHERE condition;

     
## Selective Columns :

      CREATE TABLE new_table AS 
      SELECT column_1, column_2, ... column_n 
      FROM old_table;


## Selective Columns from Multiple Table:

      CREATE TABLE new_table AS
      SELECT column_1, column_2, ... column_n 
      FROM old_table_1, old_table_2, ... old_table_n;

## Empty Table with Same Schema of other table:

      CREATE TABLE new_table 
      AS SELECT * 
      FROM old_table
      WITH NO DATA; 


## Specify format and compression format:

      CREATE TABLE new_table
      WITH (
            format = 'Parquet',
            parquet_compression = 'SNAPPY')
      AS SELECT *
      FROM old_table;


## With ORC :

    CREATE TABLE new_table
    WITH (format = 'ORC',
          orc_compression = 'SNAPPY')
    AS SELECT *
    FROM old_table ;

## Result in different Format and location:

    CREATE TABLE my_orc_ctas_table
    WITH (
      external_location = 's3://my_athena_results/my_orc_stas_table/',
      format = 'ORC')
    AS SELECT * 
    FROM old_table;

## Create Unpartitioned Table: 

    CREATE TABLE ctas_csv_unpartitioned 
    WITH (
     format = 'TEXTFILE', 
     external_location = 's3://my_athena_results/ctas_csv_unpartitioned/') 
    AS SELECT key1, name1, address1, comment1
    FROM table1;


 ## Un-partitioned Parquet Format with Default location :

      CREATE TABLE ctas_parquet_unpartitioned 
      WITH (format = 'PARQUET') 
      AS SELECT key1, name1, comment1
      FROM table1;

  ## Json Format with DEfault location:

      CREATE TABLE ctas_json_unpartitioned 
      WITH (
       format = 'JSON',  
       external_location = 's3://my_athena_results/ctas_json_unpartitioned/') 
      AS SELECT key1, name1, address1, comment1
      FROM table1;

  ## ORC format:

      CREATE TABLE ctas_orc_unpartitioned 
      WITH (
       format = 'ORC') 
      AS SELECT key1, name1, comment1 
      FROM table1;

  ## Creating Partitioned Table:

       CREATE TABLE ctas_csv_partitioned 
      WITH (
           format = 'TEXTFILE',  
           external_location = 's3://my_athena_results/ctas_csv_partitioned/', 
           partitioned_by = ARRAY['key1']) 
      AS SELECT name1, address1, comment1, key1
      FROM tables1;


    ## Json Partitioned :


            CREATE TABLE ctas_json_partitioned 
    WITH (
         format = 'JSON', 
         external_location = 's3://my_athena_results/ctas_json_partitioned/', 
         partitioned_by = ARRAY['key1']) 
    AS select name1, address1, comment1, key1 
    FROM table1;

    ## Creating buckted and partitioned table:

              CREATE TABLE ctas_avro_bucketed 
      WITH (
            format = 'AVRO', 
            external_location = 's3://my_athena_results/ctas_avro_bucketed/', 
            partitioned_by = ARRAY['nationkey'], 
            bucketed_by = ARRAY['mktsegment'], 
            bucket_count = 3) 
      AS SELECT key1, name1, address1, phone1, acctbal, mktsegment, comment1, nationkey 
      FROM table1;