Hive Assignment - TechCruncher/SparkCode GitHub Wiki
Hive Assignment
select * from customers where lower(fname) like '%bridget%' and lower(city) like '%kansas%';
sqoop import \ --connect jdbc:mysql://localhost/dualcore \ --username training --password training \ --fields-terminated-by '\t' \ --table suppliers \ --hive-import
CREATE EXTERNAL TABLE employees (emp_id STRING, fname STRING, lname STRING, address STRING, city STRING, state STRING, zipcode STRING, job_title STRING, email STRING, active STRING, salary INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION '/dualcore/employees';
CREATE TABLE ratings (posted TIMESTAMP, cust_id INT, prod_id INT, rating TINYINT, message STRING);
CREATE TABLE loyalty_program (cust_id INT, fname STRING, lname STRING, email STRING, level STRING, phone MAP<STRING, STRING>, order_ids ARRAY<INT>, order_value STRUCT<min:INT, max:INT, avg:INT, total:INT>) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':';
select phone['HOME'] from loyalty_program where cust_id = "1200866";
select order_ids[2] from loyalty_program where cust_id = "1200866";
select order_value.total from loyalty_program where cust_id = "1200866";
alter table loyalty_program CHANGE level status;
alter table loyalty_program rename to reward_program;
hadoop fs -put $ADIR/data/access.log /dualcore/web_logs
SELECT term, COUNT(term) AS num FROM (SELECT LOWER(REGEXP_EXTRACT(request, '/search\\?phrase=(\\S+)', 1)) AS term FROM web_logs WHERE request REGEXP '/search\\?phrase=') terms GROUP BY term ORDER BY num DESC LIMIT 3;
CREATE TABLE checkout_sessions AS SELECT cookie, ip_address, COUNT(request) AS steps_completed FROM web_logs WHERE request REGEXP '/cart/checkout/step\\d.+' GROUP BY cookie, ip_address;
CREATE TABLE cart_items AS select cookie, REGEXP_EXTRACT(request, '/cart/additem\\?productid=(\\d+)',1) as prod_id from web_logs where request REGEXP '/cart/additem\\?productid=(\\d+)';