大数据常用SQL - xd21303/Notebook GitHub Wiki
大数据常用SQL
TIMESTAMP转换
# 13位时间戳转化为字符串
from_unixtime(cast(timestamps/1000 as int), 'yyyy-MM-dd HH:mm:ss')
to_unix_timestamp('2021-11-30 11:12:55')
# 字符串转化为时间戳
unix_timestamp('2021-11-30 11:12:55') # impala 只支持unix_timestamp
查看表详情
desc extended tablename
日期加减
date_add(start_date, num_days)
length
hive 里中文是1个长度,impala中文是3个长度
中位数
# hive 精确中位数
percentile(cast(trade_amount as BIGINT), 0.5)
# hive 粗略中位数
percentile_approx(trade_amount, 0.5)
# impala
APPX_MEDIAN(trade_amount)
hive 保留小数点后2位
cast(number as decimal(10, 2))
分区表增加字段
cascade 字段会修改所有分区,包括历史。
如果不刷历史,就不要用cascade,使用默认的RESTRICT。
alter table ads.ads_xf_cargo_looking_for_car_model_predict add columns (use_truck_len string comment '司机车长') cascade;
alter table ads.ads_xf_cargo_looking_for_car_model_predict change column use_truck_len use_truck_len string after d2c_distance cascade;
判断星期几
0~6对应星期天到星期六
pmod(datediff('2022-05-08', '1920-01-01') - 3, 7)
# 判断是0,将其变成7
select
IF(
pmod(datediff('2022-05-05', '1920-01-01') - 3, 7) = '0',
7,
pmod(datediff('2022-05-05', '1920-01-01') - 3, 7)
)
分割字符串
在hive中,一个中文字符长度为1
在impala中,一个中文字符长度为3
# hive 序号0,1,2
split(sub_market_id, '_')
# impala 序号1,2,3
SPLIT_PART(sub_market_id, '_', 2)
json列表变行
lateral view hive_udf.ymm_explode_json_array(driver_list) t AS user_ids
多行变一行
concat_ws('&', collect_set(config_value))
group by
config_key
多个列变多行(列转行)
select
no,
num
from
(
select
'no' as no,
'111' as one,
'222' as two,
null as three
) a lateral view explode(split(concat(one, ',', two), ',')) b as num
正则
REGEXP_EXTRACT((a.`工号`), '[0-9]+', 0)
最大值、最小值
max,min 是针对列
greatest,least 是针对行
生成1到任意数据
WITH Digits AS (
SELECT 0 AS Number
UNION SELECT 1
UNION SELECT 2
UNION SELECT 3
UNION SELECT 4
UNION SELECT 5
UNION SELECT 6
UNION SELECT 7
UNION SELECT 8
UNION SELECT 9
)
SELECT chapter FROM (
SELECT
(d4.Number * 10000)
+ (d3.Number * 1000)
+ (d2.Number * 100)
+ (d1.Number * 10)
+ d0.Number AS chapter
FROM
Digits AS d4
, Digits AS d3
, Digits AS d2
, Digits AS d1
, Digits AS d0
) t1
WHERE chapter > 0 AND chapter <= 5000
ORDER BY chapter