> 文章列表 > hive实战开发-2(建表、写数、加载数据到hdfs、动态分区、列转行)

hive实战开发-2(建表、写数、加载数据到hdfs、动态分区、列转行)

hive实战开发-2(建表、写数、加载数据到hdfs、动态分区、列转行)

hive实战开发-2

  • 1、建表
  • 2、写数
  • 3、加载数据到hdfs
  • 4、动态分区
  • 5、利用group by + collect_list命令按照mac聚合数据

1、建表

--定义新表
create EXTERNAL table if not exists hdw_psi${g_he_dbpostfix}.psi_nginx_access_log_cnt(
statistics_dt string comment'统计日期'
,data_dt string comment'数据日期'
,d_cnt bigint comment'日调用次数'
,m_cnt bigint comment'月调用次数'
)
partitioned by (pdate string comment'分区')
stored as orc--表结构采用orc
tblproperties('creater'='22033799');

2、写数

insert into hdw_psi_dev.test1(mac_id, start_time, end_time, wind_speed) 
select '001', '07:20', '07:49', '2,3' union all
select '001', '11:25', '12:30', '1,2,3' union all
select '002', '10:35', '11:13', '2' union all
select '002', '13:30', '14:15', '3,4' union all
select '002', '20:45', '21:00', '1,2,3';

3、加载数据到hdfs

--将本地数据加载到hdfs
load data inpath '/user/udhuangj/spark/test_data_less.csv' into table hdw_psi_dev.opn_learn_day partition(pdate='20230314');

4、动态分区

create table hdw_psi_dev.opn_learn_day(statistics_dt              string COMMENT '使用日期 yyyy-MM-dd',mac_id                   string COMMENT 'macid',wifi_type                string COMMENT 'WiFitypeId',warmup_starttime         string COMMENT '预热开始时间 HH:mm',warmup_endtime           string COMMENT '预热结束时间 HH:mm',target_temperature       string COMMENT '目标温度'
) comment '用水明细表测试'
PARTITIONED BY (pdate string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\,' STORED AS TEXTFILE;create table hdw_psi_dev.opn_learn_day_test(statistics_dt              string COMMENT '使用日期 yyyy-MM-dd',mac_id                   string COMMENT 'macid',wifi_type                string COMMENT 'WiFitypeId',warmup_starttime         string COMMENT '预热开始时间 HH:mm',warmup_endtime           string COMMENT '预热结束时间 HH:mm',target_temperature       string COMMENT '目标温度'
) comment '用水明细表测试'
PARTITIONED BY (wifi string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\,' STORED AS TEXTFILE;--按照挑选出来的变量传递给分区
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
insert into hdw_psi_dev.opn_learn_day_test partition(wifi) 
select statistics_dt,mac_id,wifi_type,warmup_starttime,warmup_endtime,target_temperature,wifi_type
from hdw_psi_dev.opn_learn_day
where pdate='20230313';

5、利用group by + collect_list命令按照mac聚合数据

小批量的话不需要concat_ws,若数据量大需要配合加入concat_ws

--小批量
insert overwrite table hdw_psi_dev.opn_learn_day partition(pdate='20230330')
select '20230330' as statistics_dt,t1.mac_id,t1.wifi_type,t1.warmup_starttime,t1.warmup_endtime,t1.target_temperature
from (select mac_id,collect_list(distinct wifi_type) as wifi_type,collect_list(warmup_starttime) as warmup_starttime,collect_list(warmup_endtime) as warmup_endtime,collect_list(target_temperature) as target_temperaturefrom hdw_psi_dev.opn_learn_daywhere pdate = '20230313'group by mac_id) as t1;--大批量
insert overwrite table hdw_psi_dev.opn_learn_day partition(pdate='20230330')
select '20230330' as statistics_dt,t1.mac_id,t1.wifi_type,t1.warmup_starttime,t1.warmup_endtime,t1.target_temperature
from (select mac_id,concat_ws('-', collect_list(distinct wifi_type)) as wifi_type,concat_ws('-', collect_list(warmup_starttime)) as warmup_starttime,concat_ws('-', collect_list(warmup_endtime)) as warmup_endtime,concat_ws('-', collect_list(target_temperature)) as target_temperaturefrom hdw_psi_dev.opn_learn_daywhere pdate = '20230313'group by mac_id) as t1;