> 文章列表 > 大数据项目实战之数据仓库:电商数据仓库系统——第8章 数仓开发之DIM层

大数据项目实战之数据仓库:电商数据仓库系统——第8章 数仓开发之DIM层

大数据项目实战之数据仓库:电商数据仓库系统——第8章 数仓开发之DIM层

文章目录

  • 第8章 数仓开发之DIM层
    • 8.1 商品维度
    • 8.2 优惠券维度表
    • 8.3 活动维度表
    • 8.4 地区维度表
    • 8.5 日期维度表
    • 8.6 用户维度表
    • 8.7 数据装载脚本
      • 8.7.1 首日装载脚本
      • 8.7.2 每日装载脚本

第8章 数仓开发之DIM层

DIM层设计要点:

(1)DIM层的设计依据是维度建模理论,该层存储维度模型的维度表。

(2)DIM层的数据存储格式为orc列式存储+snappy压缩。

(3)DIM层表名的命名规范为dim_表名_全量表或者拉链表标识(full/zip)

8.1 商品维度表

1)建表语句

DROP TABLE IF EXISTS dim_sku_full;
CREATE EXTERNAL TABLE dim_sku_full
(`id`                   STRING COMMENT 'sku_id',`price`                DECIMAL(16, 2) COMMENT '商品价格',`sku_name`             STRING COMMENT '商品名称',`sku_desc`             STRING COMMENT '商品描述',`weight`               DECIMAL(16, 2) COMMENT '重量',`is_sale`              BOOLEAN COMMENT '是否在售',`spu_id`               STRING COMMENT 'spu编号',`spu_name`             STRING COMMENT 'spu名称',`category3_id`         STRING COMMENT '三级分类id',`category3_name`       STRING COMMENT '三级分类名称',`category2_id`         STRING COMMENT '二级分类id',`category2_name`       STRING COMMENT '二级分类名称',`category1_id`         STRING COMMENT '一级分类id',`category1_name`       STRING COMMENT '一级分类名称',`tm_id`                STRING COMMENT '品牌id',`tm_name`              STRING COMMENT '品牌名称',`sku_attr_values`      ARRAY<STRUCT<attr_id :STRING,value_id :STRING,attr_name :STRING,value_name:STRING>> COMMENT '平台属性',`sku_sale_attr_values` ARRAY<STRUCT<sale_attr_id :STRING,sale_attr_value_id :STRING,sale_attr_name :STRING,sale_attr_value_name:STRING>> COMMENT '销售属性',`create_time`          STRING COMMENT '创建时间'
) COMMENT '商品维度表'PARTITIONED BY (`dt` STRING)STORED AS ORCLOCATION '/warehouse/gmall/dim/dim_sku_full/'TBLPROPERTIES ('orc.compress' = 'snappy');

2)数据装载

with
sku as
(selectid,price,sku_name,sku_desc,weight,is_sale,spu_id,category3_id,tm_id,create_timefrom ods_sku_info_fullwhere dt='2020-06-14'
),
spu as
(selectid,spu_namefrom ods_spu_info_fullwhere dt='2020-06-14'
),
c3 as
(selectid,name,category2_idfrom ods_base_category3_fullwhere dt='2020-06-14'
),
c2 as
(selectid,name,category1_idfrom ods_base_category2_fullwhere dt='2020-06-14'
),
c1 as
(selectid,namefrom ods_base_category1_fullwhere dt='2020-06-14'
),
tm as
(selectid,tm_namefrom ods_base_trademark_fullwhere dt='2020-06-14'
),
attr as
(selectsku_id,collect_set(named_struct('attr_id',attr_id,'value_id',value_id,'attr_name',attr_name,'value_name',value_name)) attrsfrom ods_sku_attr_value_fullwhere dt='2020-06-14'group by sku_id
),
sale_attr as
(selectsku_id,collect_set(named_struct('sale_attr_id',sale_attr_id,'sale_attr_value_id',sale_attr_value_id,'sale_attr_name',sale_attr_name,'sale_attr_value_name',sale_attr_value_name)) sale_attrsfrom ods_sku_sale_attr_value_fullwhere dt='2020-06-14'group by sku_id
)
insert overwrite table dim_sku_full partition(dt='2020-06-14')
selectsku.id,sku.price,sku.sku_name,sku.sku_desc,sku.weight,sku.is_sale,sku.spu_id,spu.spu_name,sku.category3_id,c3.name,c3.category2_id,c2.name,c2.category1_id,c1.name,sku.tm_id,tm.tm_name,attr.attrs,sale_attr.sale_attrs,sku.create_time
from sku
left join spu on sku.spu_id=spu.id
left join c3 on sku.category3_id=c3.id
left join c2 on c3.category2_id=c2.id
left join c1 on c2.category1_id=c1.id
left join tm on sku.tm_id=tm.id
left join attr on sku.id=attr.sku_id
left join sale_attr on sku.id=sale_attr.sku_id;

8.2 优惠券维度表

1)建表语句

DROP TABLE IF EXISTS dim_coupon_full;
CREATE EXTERNAL TABLE dim_coupon_full
(`id`               STRING COMMENT '购物券编号',`coupon_name`      STRING COMMENT '购物券名称',`coupon_type_code` STRING COMMENT '购物券类型编码',`coupon_type_name` STRING COMMENT '购物券类型名称',`condition_amount` DECIMAL(16, 2) COMMENT '满额数',`condition_num`    BIGINT COMMENT '满件数',`activity_id`      STRING COMMENT '活动编号',`benefit_amount`   DECIMAL(16, 2) COMMENT '减金额',`benefit_discount` DECIMAL(16, 2) COMMENT '折扣',`benefit_rule`     STRING COMMENT '优惠规则:满元*减*元,满*件打*折',`create_time`      STRING COMMENT '创建时间',`range_type_code`  STRING COMMENT '优惠范围类型编码',`range_type_name`  STRING COMMENT '优惠范围类型名称',`limit_num`        BIGINT COMMENT '最多领取次数',`taken_count`      BIGINT COMMENT '已领取次数',`start_time`       STRING COMMENT '可以领取的开始日期',`end_time`         STRING COMMENT '可以领取的结束日期',`operate_time`     STRING COMMENT '修改时间',`expire_time`      STRING COMMENT '过期时间'
) COMMENT '优惠券维度表'PARTITIONED BY (`dt` STRING)STORED AS ORCLOCATION '/warehouse/gmall/dim/dim_coupon_full/'TBLPROPERTIES ('orc.compress' = 'snappy');

2)数据装载

insert overwrite table dim_coupon_full partition(dt='2020-06-14')
selectid,coupon_name,coupon_type,coupon_dic.dic_name,condition_amount,condition_num,activity_id,benefit_amount,benefit_discount,case coupon_typewhen '3201' then concat('满',condition_amount,'元减',benefit_amount,'元')when '3202' then concat('满',condition_num,'件打',10*(1-benefit_discount),'折')when '3203' then concat('减',benefit_amount,'元')end benefit_rule,create_time,range_type,range_dic.dic_name,limit_num,taken_count,start_time,end_time,operate_time,expire_time
from
(selectid,coupon_name,coupon_type,condition_amount,condition_num,activity_id,benefit_amount,benefit_discount,create_time,range_type,limit_num,taken_count,start_time,end_time,operate_time,expire_timefrom ods_coupon_info_fullwhere dt='2020-06-14'
)ci
left join
(selectdic_code,dic_namefrom ods_base_dic_fullwhere dt='2020-06-14'and parent_code='32'
)coupon_dic
on ci.coupon_type=coupon_dic.dic_code
left join
(selectdic_code,dic_namefrom ods_base_dic_fullwhere dt='2020-06-14'and parent_code='33'
)range_dic
on ci.range_type=range_dic.dic_code;

8.3 活动维度表

1)建表语句

DROP TABLE IF EXISTS dim_activity_full;
CREATE EXTERNAL TABLE dim_activity_full
(`activity_rule_id`   STRING COMMENT '活动规则ID',`activity_id`        STRING COMMENT '活动ID',`activity_name`      STRING COMMENT '活动名称',`activity_type_code` STRING COMMENT '活动类型编码',`activity_type_name` STRING COMMENT '活动类型名称',`activity_desc`      STRING COMMENT '活动描述',`start_time`         STRING COMMENT '开始时间',`end_time`           STRING COMMENT '结束时间',`create_time`        STRING COMMENT '创建时间',`condition_amount`   DECIMAL(16, 2) COMMENT '满减金额',`condition_num`      BIGINT COMMENT '满减件数',`benefit_amount`     DECIMAL(16, 2) COMMENT '优惠金额',`benefit_discount`   DECIMAL(16, 2) COMMENT '优惠折扣',`benefit_rule`       STRING COMMENT '优惠规则',`benefit_level`      STRING COMMENT '优惠级别'
) COMMENT '活动信息表'PARTITIONED BY (`dt` STRING)STORED AS ORCLOCATION '/warehouse/gmall/dim/dim_activity_full/'TBLPROPERTIES ('orc.compress' = 'snappy');

2)数据装载

insert overwrite table dim_activity_full partition(dt='2020-06-14')
selectrule.id,info.id,activity_name,rule.activity_type,dic.dic_name,activity_desc,start_time,end_time,create_time,condition_amount,condition_num,benefit_amount,benefit_discount,case rule.activity_typewhen '3101' then concat('满',condition_amount,'元减',benefit_amount,'元')when '3102' then concat('满',condition_num,'件打',10*(1-benefit_discount),'折')when '3103' then concat('打',10*(1-benefit_discount),'折')end benefit_rule,benefit_level
from
(selectid,activity_id,activity_type,condition_amount,condition_num,benefit_amount,benefit_discount,benefit_levelfrom ods_activity_rule_fullwhere dt='2020-06-14'
)rule
left join
(selectid,activity_name,activity_type,activity_desc,start_time,end_time,create_timefrom ods_activity_info_fullwhere dt='2020-06-14'
)info
on rule.activity_id=info.id
left join
(selectdic_code,dic_namefrom ods_base_dic_fullwhere dt='2020-06-14'and parent_code='31'
)dic
on rule.activity_type=dic.dic_code;

8.4 地区维度表

1)建表语句

DROP TABLE IF EXISTS dim_province_full;
CREATE EXTERNAL TABLE dim_province_full
(`id`            STRING COMMENT 'id',`province_name` STRING COMMENT '省市名称',`area_code`     STRING COMMENT '地区编码',`iso_code`      STRING COMMENT '旧版ISO-3166-2编码,供可视化使用',`iso_3166_2`    STRING COMMENT '新版IOS-3166-2编码,供可视化使用',`region_id`     STRING COMMENT '地区id',`region_name`   STRING COMMENT '地区名称'
) COMMENT '地区维度表'PARTITIONED BY (`dt` STRING)STORED AS ORCLOCATION '/warehouse/gmall/dim/dim_province_full/'TBLPROPERTIES ('orc.compress' = 'snappy');

2)数据装载

insert overwrite table dim_province_full partition(dt='2020-06-14')
selectprovince.id,province.name,province.area_code,province.iso_code,province.iso_3166_2,region_id,region_name
from
(selectid,name,region_id,area_code,iso_code,iso_3166_2from ods_base_province_fullwhere dt='2020-06-14'
)province
left join
(selectid,region_namefrom ods_base_region_fullwhere dt='2020-06-14'
)region
on province.region_id=region.id;

8.5 日期维度表

1)建表语句

DROP TABLE IF EXISTS dim_date;
CREATE EXTERNAL TABLE dim_date
(`date_id`    STRING COMMENT '日期ID',`week_id`    STRING COMMENT '周ID,一年中的第几周',`week_day`   STRING COMMENT '周几',`day`        STRING COMMENT '每月的第几天',`month`      STRING COMMENT '一年中的第几月',`quarter`    STRING COMMENT '一年中的第几季度',`year`       STRING COMMENT '年份',`is_workday` STRING COMMENT '是否是工作日',`holiday_id` STRING COMMENT '节假日'
) COMMENT '时间维度表'STORED AS ORCLOCATION '/warehouse/gmall/dim/dim_date/'TBLPROPERTIES ('orc.compress' = 'snappy');

2)数据装载

通常情况下,时间维度表的数据并不是来自于业务系统,而是手动写入,并且由于时间维度表数据的可预见性,无须每日导入,一般可一次性导入一年的数据。

(1)创建临时表

DROP TABLE IF EXISTS tmp_dim_date_info;
CREATE EXTERNAL TABLE tmp_dim_date_info (`date_id` STRING COMMENT '日',`week_id` STRING COMMENT '周ID',`week_day` STRING COMMENT '周几',`day` STRING COMMENT '每月的第几天',`month` STRING COMMENT '第几月',`quarter` STRING COMMENT '第几季度',`year` STRING COMMENT '年',`is_workday` STRING COMMENT '是否是工作日',`holiday_id` STRING COMMENT '节假日'
) COMMENT '时间维度表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t'
LOCATION '/warehouse/gmall/tmp/tmp_dim_date_info/';

(2)将数据文件上传到HFDS上临时表路径/warehouse/gmall/tmp/tmp_dim_date_info

date_info.txt

(3)执行以下语句将其导入时间维度表

insert overwrite table dim_date select * from tmp_dim_date_info;

(4)检查数据是否导入成功

select * from dim_date;

8.6 用户维度表

1)建表语句

DROP TABLE IF EXISTS dim_user_zip;
CREATE EXTERNAL TABLE dim_user_zip
(`id`           STRING COMMENT '用户id',`login_name`   STRING COMMENT '用户名称',`nick_name`    STRING COMMENT '用户昵称',`name`         STRING COMMENT '用户姓名',`phone_num`    STRING COMMENT '手机号码',`email`        STRING COMMENT '邮箱',`user_level`   STRING COMMENT '用户等级',`birthday`     STRING COMMENT '生日',`gender`       STRING COMMENT '性别',`create_time`  STRING COMMENT '创建时间',`operate_time` STRING COMMENT '操作时间',`start_date`   STRING COMMENT '开始日期',`end_date`     STRING COMMENT '结束日期'
) COMMENT '用户表'PARTITIONED BY (`dt` STRING)STORED AS ORCLOCATION '/warehouse/gmall/dim/dim_user_zip/'TBLPROPERTIES ('orc.compress' = 'snappy');

2)分区规划

用户拉链表分区

大数据项目实战之数据仓库:电商数据仓库系统——第8章 数仓开发之DIM层

3)数据装载

(1)数据装载过程

拉链表装在过程

大数据项目实战之数据仓库:电商数据仓库系统——第8章 数仓开发之DIM层

(2)数据流向

用户维度表

大数据项目实战之数据仓库:电商数据仓库系统——第8章 数仓开发之DIM层

(3)首日装载

insert overwrite table dim_user_zip partition (dt='9999-12-31')
selectdata.id,data.login_name,data.nick_name,md5(data.name),md5(data.phone_num),md5(data.email),data.user_level,data.birthday,data.gender,data.create_time,data.operate_time,'2020-06-14' start_date,'9999-12-31' end_date
from ods_user_info_inc
where dt='2020-06-14'
and type='bootstrap-insert';

(4)每日装载

装载思路

大数据项目实战之数据仓库:电商数据仓库系统——第8章 数仓开发之DIM层

装载语句

with
tmp as
(selectold.id old_id,old.login_name old_login_name,old.nick_name old_nick_name,old.name old_name,old.phone_num old_phone_num,old.email old_email,old.user_level old_user_level,old.birthday old_birthday,old.gender old_gender,old.create_time old_create_time,old.operate_time old_operate_time,old.start_date old_start_date,old.end_date old_end_date,new.id new_id,new.login_name new_login_name,new.nick_name new_nick_name,new.name new_name,new.phone_num new_phone_num,new.email new_email,new.user_level new_user_level,new.birthday new_birthday,new.gender new_gender,new.create_time new_create_time,new.operate_time new_operate_time,new.start_date new_start_date,new.end_date new_end_datefrom(selectid,login_name,nick_name,name,phone_num,email,user_level,birthday,gender,create_time,operate_time,start_date,end_datefrom dim_user_zipwhere dt='9999-12-31')oldfull outer join(selectid,login_name,nick_name,md5(name) name,md5(phone_num) phone_num,md5(email) email,user_level,birthday,gender,create_time,operate_time,'2020-06-15' start_date,'9999-12-31' end_datefrom(selectdata.id,data.login_name,data.nick_name,data.name,data.phone_num,data.email,data.user_level,data.birthday,data.gender,data.create_time,data.operate_time,row_number() over (partition by data.id order by ts desc) rnfrom ods_user_info_incwhere dt='2020-06-15')t1where rn=1)newon old.id=new.id
)
insert overwrite table dim_user_zip partition(dt)
selectif(new_id is not null,new_id,old_id),if(new_id is not null,new_login_name,old_login_name),if(new_id is not null,new_nick_name,old_nick_name),if(new_id is not null,new_name,old_name),if(new_id is not null,new_phone_num,old_phone_num),if(new_id is not null,new_email,old_email),if(new_id is not null,new_user_level,old_user_level),if(new_id is not null,new_birthday,old_birthday),if(new_id is not null,new_gender,old_gender),if(new_id is not null,new_create_time,old_create_time),if(new_id is not null,new_operate_time,old_operate_time),if(new_id is not null,new_start_date,old_start_date),if(new_id is not null,new_end_date,old_end_date),if(new_id is not null,new_end_date,old_end_date) dt
from tmp
union all
selectold_id,old_login_name,old_nick_name,old_name,old_phone_num,old_email,old_user_level,old_birthday,old_gender,old_create_time,old_operate_time,old_start_date,cast(date_add('2020-06-15',-1) as string) old_end_date,cast(date_add('2020-06-15',-1) as string) dt
from tmp
where old_id is not null
and new_id is not null;
insert overwrite table dim_user_zip partition(dt)
selectid,login_name,nick_name,name,phone_num,email,user_level,birthday,gender,create_time,operate_time,start_date,if(rk=2, date_sub('2020-06-15', 1), end_date) end_date,if(rk=1, '9999-12-31', date_sub('2020-06-15', 1))
from
(selectid,login_name,nick_name,name,phone_num,email,user_level,birthday,gender,create_time,operate_time,start_date,end_date,rank() over (partition by id order by start_date desc) rkfrom(selectid,login_name,nick_name,name,phone_num,email,user_level,birthday,gender,create_time,operate_time,start_date,end_datefrom dim_user_zipwhere dt='9999-12-31'union allselectid,login_name,nick_name,md5(name) name,md5(phone_num) phone_num,md5(email) email,user_level,birthday,gender,create_time,operate_time,'2020-06-15' start_date,'9999-12-31' end_datefrom(selectdata.id,data.login_name,data.nick_name,data.name,data.phone_num,data.email,data.user_level,data.birthday,data.gender,data.create_time,data.operate_time,row_number() over (partition by data.id order by ts desc) rnfrom ods_user_info_incwhere dt='2020-06-15')t1where rn=1)t2
)t3

8.7 数据装载脚本

8.7.1 首日装载脚本

(1)在hadoop102的/home/atguigu/bin目录下创建ods_to_dim_init.sh

[atguigu@hadoop102 bin]$ vim ods_to_dim_init.sh

(2)编写如下内容

#!/bin/bashAPP=gmallif [ -n "$2" ] ;thendo_date=$2
else echo "请传入日期参数"exit
fi dim_user_zip="
insert overwrite table ${APP}.dim_user_zip partition (dt='9999-12-31')
selectdata.id,data.login_name,data.nick_name,md5(data.name),md5(data.phone_num),md5(data.email),data.user_level,data.birthday,data.gender,data.create_time,data.operate_time,'$do_date' start_date,'9999-12-31' end_date
from ${APP}.ods_user_info_inc
where dt='$do_date'
and type='bootstrap-insert';
"dim_sku_full="
with
sku as
(selectid,price,sku_name,sku_desc,weight,is_sale,spu_id,category3_id,tm_id,create_timefrom ${APP}.ods_sku_info_fullwhere dt='$do_date'
),
spu as
(selectid,spu_namefrom ${APP}.ods_spu_info_fullwhere dt='$do_date'
),
c3 as
(selectid,name,category2_idfrom ${APP}.ods_base_category3_fullwhere dt='$do_date'
),
c2 as
(selectid,name,category1_idfrom ${APP}.ods_base_category2_fullwhere dt='$do_date'
),
c1 as
(selectid,namefrom ${APP}.ods_base_category1_fullwhere dt='$do_date'
),
tm as
(selectid,tm_namefrom ${APP}.ods_base_trademark_fullwhere dt='$do_date'
),
attr as
(selectsku_id,collect_set(named_struct('attr_id',attr_id,'value_id',value_id,'attr_name',attr_name,'value_name',value_name)) attrsfrom ${APP}.ods_sku_attr_value_fullwhere dt='$do_date'group by sku_id
),
sale_attr as
(selectsku_id,collect_set(named_struct('sale_attr_id',sale_attr_id,'sale_attr_value_id',sale_attr_value_id,'sale_attr_name',sale_attr_name,'sale_attr_value_name',sale_attr_value_name)) sale_attrsfrom ${APP}.ods_sku_sale_attr_value_fullwhere dt='$do_date'group by sku_id
)
insert overwrite table ${APP}.dim_sku_full partition(dt='$do_date')
selectsku.id,sku.price,sku.sku_name,sku.sku_desc,sku.weight,sku.is_sale,sku.spu_id,spu.spu_name,sku.category3_id,c3.name,c3.category2_id,c2.name,c2.category1_id,c1.name,sku.tm_id,tm.tm_name,attr.attrs,sale_attr.sale_attrs,sku.create_time
from sku
left join spu on sku.spu_id=spu.id
left join c3 on sku.category3_id=c3.id
left join c2 on c3.category2_id=c2.id
left join c1 on c2.category1_id=c1.id
left join tm on sku.tm_id=tm.id
left join attr on sku.id=attr.sku_id
left join sale_attr on sku.id=sale_attr.sku_id;
"dim_province_full="
insert overwrite table ${APP}.dim_province_full partition(dt='$do_date')
selectprovince.id,province.name,province.area_code,province.iso_code,province.iso_3166_2,region_id,region_name
from
(selectid,name,region_id,area_code,iso_code,iso_3166_2from ${APP}.ods_base_province_fullwhere dt='$do_date'
)province
left join
(selectid,region_namefrom ${APP}.ods_base_region_fullwhere dt='$do_date'
)region
on province.region_id=region.id;
"dim_coupon_full="
insert overwrite table ${APP}.dim_coupon_full partition(dt='$do_date')
selectid,coupon_name,coupon_type,coupon_dic.dic_name,condition_amount,condition_num,activity_id,benefit_amount,benefit_discount,case coupon_typewhen '3201' then concat('满',condition_amount,'元减',benefit_amount,'元')when '3202' then concat('满',condition_num,'件打',10*(1-benefit_discount),'折')when '3203' then concat('减',benefit_amount,'元')end benefit_rule,create_time,range_type,range_dic.dic_name,limit_num,taken_count,start_time,end_time,operate_time,expire_time
from
(selectid,coupon_name,coupon_type,condition_amount,condition_num,activity_id,benefit_amount,benefit_discount,create_time,range_type,limit_num,taken_count,start_time,end_time,operate_time,expire_timefrom ${APP}.ods_coupon_info_fullwhere dt='$do_date'
)ci
left join
(selectdic_code,dic_namefrom ${APP}.ods_base_dic_fullwhere dt='$do_date'and parent_code='32'
)coupon_dic
on ci.coupon_type=coupon_dic.dic_code
left join
(selectdic_code,dic_namefrom ${APP}.ods_base_dic_fullwhere dt='$do_date'and parent_code='33'
)range_dic
on ci.range_type=range_dic.dic_code;
"dim_activity_full="
insert overwrite table ${APP}.dim_activity_full partition(dt='$do_date')
selectrule.id,info.id,activity_name,rule.activity_type,dic.dic_name,activity_desc,start_time,end_time,create_time,condition_amount,condition_num,benefit_amount,benefit_discount,case rule.activity_typewhen '3101' then concat('满',condition_amount,'元减',benefit_amount,'元')when '3102' then concat('满',condition_num,'件打',10*(1-benefit_discount),'折')when '3103' then concat('打',10*(1-benefit_discount),'折')end benefit_rule,benefit_level
from
(selectid,activity_id,activity_type,condition_amount,condition_num,benefit_amount,benefit_discount,benefit_levelfrom ${APP}.ods_activity_rule_fullwhere dt='$do_date'
)rule
left join
(selectid,activity_name,activity_type,activity_desc,start_time,end_time,create_timefrom ${APP}.ods_activity_info_fullwhere dt='$do_date'
)info
on rule.activity_id=info.id
left join
(selectdic_code,dic_namefrom ${APP}.ods_base_dic_fullwhere dt='$do_date'and parent_code='31'
)dic
on rule.activity_type=dic.dic_code;
"case $1 in
"dim_user_zip")hive -e "$dim_user_zip"
;;
"dim_sku_full")hive -e "$dim_sku_full"
;;
"dim_province_full")hive -e "$dim_province_full"
;;
"dim_coupon_full")hive -e "$dim_coupon_full"
;;
"dim_activity_full")hive -e "$dim_activity_full"
;;
"all")hive -e "$dim_user_zip$dim_sku_full$dim_province_full$dim_coupon_full$dim_activity_full"
;;
esac

(3)增加脚本执行权限

[atguigu@hadoop102 bin]$ chmod +x ods_to_dim_init.sh

(4)脚本用法

[atguigu@hadoop102 bin]$ ods_to_dim_init.sh all 2020-06-14

8.7.2 每日装载脚本

(1)在hadoop102的/home/atguigu/bin目录下创建ods_to_dim.sh

[atguigu@hadoop102 bin]$ vim ods_to_dim.sh

(2)编写如下内容

#!/bin/bashAPP=gmall# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$2" ] ;thendo_date=$2
else do_date=`date -d "-1 day" +%F`
fidim_user_zip="
set hive.exec.dynamic.partition.mode=nonstrict;
with
tmp as
(selectold.id old_id,old.login_name old_login_name,old.nick_name old_nick_name,old.name old_name,old.phone_num old_phone_num,old.email old_email,old.user_level old_user_level,old.birthday old_birthday,old.gender old_gender,old.create_time old_create_time,old.operate_time old_operate_time,old.start_date old_start_date,old.end_date old_end_date,new.id new_id,new.login_name new_login_name,new.nick_name new_nick_name,new.name new_name,new.phone_num new_phone_num,new.email new_email,new.user_level new_user_level,new.birthday new_birthday,new.gender new_gender,new.create_time new_create_time,new.operate_time new_operate_time,new.start_date new_start_date,new.end_date new_end_datefrom(selectid,login_name,nick_name,name,phone_num,email,user_level,birthday,gender,create_time,operate_time,start_date,end_datefrom ${APP}.dim_user_zipwhere dt='9999-12-31')oldfull outer join(selectid,login_name,nick_name,md5(name) name,md5(phone_num) phone_num,md5(email) email,user_level,birthday,gender,create_time,operate_time,'$do_date' start_date,'9999-12-31' end_datefrom(selectdata.id,data.login_name,data.nick_name,data.name,data.phone_num,data.email,data.user_level,data.birthday,data.gender,data.create_time,data.operate_time,row_number() over (partition by data.id order by ts desc) rnfrom ${APP}.ods_user_info_incwhere dt='$do_date')t1where rn=1)newon old.id=new.id
)
insert overwrite table ${APP}.dim_user_zip partition(dt)
selectif(new_id is not null,new_id,old_id),if(new_id is not null,new_login_name,old_login_name),if(new_id is not null,new_nick_name,old_nick_name),if(new_id is not null,new_name,old_name),if(new_id is not null,new_phone_num,old_phone_num),if(new_id is not null,new_email,old_email),if(new_id is not null,new_user_level,old_user_level),if(new_id is not null,new_birthday,old_birthday),if(new_id is not null,new_gender,old_gender),if(new_id is not null,new_create_time,old_create_time),if(new_id is not null,new_operate_time,old_operate_time),if(new_id is not null,new_start_date,old_start_date),if(new_id is not null,new_end_date,old_end_date),if(new_id is not null,new_end_date,old_end_date) dt
from tmp
union all
selectold_id,old_login_name,old_nick_name,old_name,old_phone_num,old_email,old_user_level,old_birthday,old_gender,old_create_time,old_operate_time,old_start_date,cast(date_add('$do_date',-1) as string) old_end_date,cast(date_add('$do_date',-1) as string) dt
from tmp
where old_id is not null
and new_id is not null;
"dim_sku_full="
with
sku as
(selectid,price,sku_name,sku_desc,weight,is_sale,spu_id,category3_id,tm_id,create_timefrom ${APP}.ods_sku_info_fullwhere dt='$do_date'
),
spu as
(selectid,spu_namefrom ${APP}.ods_spu_info_fullwhere dt='$do_date'
),
c3 as
(selectid,name,category2_idfrom ${APP}.ods_base_category3_fullwhere dt='$do_date'
),
c2 as
(selectid,name,category1_idfrom ${APP}.ods_base_category2_fullwhere dt='$do_date'
),
c1 as
(selectid,namefrom ${APP}.ods_base_category1_fullwhere dt='$do_date'
),
tm as
(selectid,tm_namefrom ${APP}.ods_base_trademark_fullwhere dt='$do_date'
),
attr as
(selectsku_id,collect_set(named_struct('attr_id',attr_id,'value_id',value_id,'attr_name',attr_name,'value_name',value_name)) attrsfrom ${APP}.ods_sku_attr_value_fullwhere dt='$do_date'group by sku_id
),
sale_attr as
(selectsku_id,collect_set(named_struct('sale_attr_id',sale_attr_id,'sale_attr_value_id',sale_attr_value_id,'sale_attr_name',sale_attr_name,'sale_attr_value_name',sale_attr_value_name)) sale_attrsfrom ${APP}.ods_sku_sale_attr_value_fullwhere dt='$do_date'group by sku_id
)
insert overwrite table ${APP}.dim_sku_full partition(dt='$do_date')
selectsku.id,sku.price,sku.sku_name,sku.sku_desc,sku.weight,sku.is_sale,sku.spu_id,spu.spu_name,sku.category3_id,c3.name,c3.category2_id,c2.name,c2.category1_id,c1.name,sku.tm_id,tm.tm_name,attr.attrs,sale_attr.sale_attrs,sku.create_time
from sku
left join spu on sku.spu_id=spu.id
left join c3 on sku.category3_id=c3.id
left join c2 on c3.category2_id=c2.id
left join c1 on c2.category1_id=c1.id
left join tm on sku.tm_id=tm.id
left join attr on sku.id=attr.sku_id
left join sale_attr on sku.id=sale_attr.sku_id;
"dim_province_full="
insert overwrite table ${APP}.dim_province_full partition(dt='$do_date')
selectprovince.id,province.name,province.area_code,province.iso_code,province.iso_3166_2,region_id,region_name
from
(selectid,name,region_id,area_code,iso_code,iso_3166_2from ${APP}.ods_base_province_fullwhere dt='$do_date'
)province
left join
(selectid,region_namefrom ${APP}.ods_base_region_fullwhere dt='$do_date'
)region
on province.region_id=region.id;
"dim_coupon_full="
insert overwrite table ${APP}.dim_coupon_full partition(dt='$do_date')
selectid,coupon_name,coupon_type,coupon_dic.dic_name,condition_amount,condition_num,activity_id,benefit_amount,benefit_discount,case coupon_typewhen '3201' then concat('满',condition_amount,'元减',benefit_amount,'元')when '3202' then concat('满',condition_num,'件打',10*(1-benefit_discount),'折')when '3203' then concat('减',benefit_amount,'元')end benefit_rule,create_time,range_type,range_dic.dic_name,limit_num,taken_count,start_time,end_time,operate_time,expire_time
from
(selectid,coupon_name,coupon_type,condition_amount,condition_num,activity_id,benefit_amount,benefit_discount,create_time,range_type,limit_num,taken_count,start_time,end_time,operate_time,expire_timefrom ${APP}.ods_coupon_info_fullwhere dt='$do_date'
)ci
left join
(selectdic_code,dic_namefrom ${APP}.ods_base_dic_fullwhere dt='$do_date'and parent_code='32'
)coupon_dic
on ci.coupon_type=coupon_dic.dic_code
left join
(selectdic_code,dic_namefrom ${APP}.ods_base_dic_fullwhere dt='$do_date'and parent_code='33'
)range_dic
on ci.range_type=range_dic.dic_code;
"dim_activity_full="
insert overwrite table ${APP}.dim_activity_full partition(dt='$do_date')
selectrule.id,info.id,activity_name,rule.activity_type,dic.dic_name,activity_desc,start_time,end_time,create_time,condition_amount,condition_num,benefit_amount,benefit_discount,case rule.activity_typewhen '3101' then concat('满',condition_amount,'元减',benefit_amount,'元')when '3102' then concat('满',condition_num,'件打',10*(1-benefit_discount),'折')when '3103' then concat('打',10*(1-benefit_discount),'折')end benefit_rule,benefit_level
from
(selectid,activity_id,activity_type,condition_amount,condition_num,benefit_amount,benefit_discount,benefit_levelfrom ${APP}.ods_activity_rule_fullwhere dt='$do_date'
)rule
left join
(selectid,activity_name,activity_type,activity_desc,start_time,end_time,create_timefrom ${APP}.ods_activity_info_fullwhere dt='$do_date'
)info
on rule.activity_id=info.id
left join
(selectdic_code,dic_namefrom ${APP}.ods_base_dic_fullwhere dt='$do_date'and parent_code='31'
)dic
on rule.activity_type=dic.dic_code;
"case $1 in
"dim_user_zip")hive -e "$dim_user_zip"
;;
"dim_sku_full")hive -e "$dim_sku_full"
;;
"dim_province_full")hive -e "$dim_province_full"
;;
"dim_coupon_full")hive -e "$dim_coupon_full"
;;
"dim_activity_full")hive -e "$dim_activity_full"
;;
"all")hive -e "$dim_user_zip$dim_sku_full$dim_province_full$dim_coupon_full$dim_activity_full"
;;
esac

(3)增加脚本执行权限

[atguigu@hadoop102 bin]$ chmod +x ods_to_dim.sh

(4)脚本用法

[atguigu@hadoop102 bin]$ ods_to_dim.sh all 2020-06-14