hive3从入门到精通
hive3从入门到精通
环境:
- openEuler
- MySQL8
- Hive3
启动Hadoop
# 启动hadoop
start-all.sh
# 检查hadoop进程
jps
# 检查各端口
netstat -aplnt | grep java
检查MySQL是否启动成功
ps -aux | grep mysql
netstat -aplnt | grep 3306
安装hive
# 将软件上传到 /opt/soft 目录
# 解压hive
tar -zxvf apache-hive-3.1.3-bin.tar.gz
# 目录改名
mv apache-hive-3.1.3-bin hive3
# 进入配置文件目录
cd /opt/soft/hive3/conf
# 复制配置文件
cp hive-env.sh.template hive-env.sh
cp hive-default.xml.template hive-site.xml
# 编辑环境配置文件
vim hive-env.sh
# 编辑配置文件
vim hive-site.xml
hive-env.sh
# hadoop 安装路径
export HADOOP_HOME=/opt/soft/hadoop3/
# hive 配置文件路径
export HIVE_CONF_DIR=/opt/soft/hive3/conf/
hive-site.xml
需要修改的位置提炼如下:
<configuration><!-- 记录HIve中的元数据信息 记录在mysql中 --><property><name>javax.jdo.option.ConnectionURL</name><value>jdbc:mysql://spark03:3306/hive?useUnicode=true&createDatabaseIfNotExist=true&characterEncoding=UTF8&useSSL=false&serverTimeZone=Asia/Shanghai</value></property><!-- jdbc mysql驱动 --><property><name>javax.jdo.option.ConnectionDriverName</name><value>com.mysql.cj.jdbc.Driver</value></property><!-- mysql的用户名和密码 --><property><name>javax.jdo.option.ConnectionUserName</name><value>root</value></property><property><name>javax.jdo.option.ConnectionPassword</name><value>Lihaozhe!!@@1122</value></property><property><name>hive.metastore.warehouse.dir</name><value>/user/hive/warehouse</value></property><property><name>hive.exec.scratchdir</name><value>/user/hive/tmp</value></property></property><property><name>hive.exec.local.scratchdir</name><value>/user/hive/local</value><description>Local scratch space for Hive jobs</description></property><property><name>hive.downloaded.resources.dir</name><value>/user/hive/resources</value><description>Temporary local directory for added resources in the remote file system.</description></property><!-- 日志目录 --><property><name>hive.querylog.location</name><value>/user/hive/log</value></property><!-- 设置metastore的节点信息 --><property><name>hive.metastore.uris</name><value>thrift://spark01:9083</value></property><!-- 客户端远程连接的端口 --><property> <name>hive.server2.thrift.port</name> <value>10000</value></property><property> <name>hive.server2.thrift.bind.host</name> <value>0.0.0.0</value></property><property><name>hive.server2.webui.host</name><value>0.0.0.0</value></property><!-- hive服务的页面的端口 --><property><name>hive.server2.webui.port</name><value>10002</value></property><property> <name>hive.server2.long.polling.timeout</name> <value>5000</value> </property><property><name>hive.server2.enable.doAs</name><value>true</value></property><!--
<property>
<name>datanucleus.autoCreateSchema</name>
<value>false</value>
</property><property>
<name>datanucleus.fixedDatastore</name>
<value>true</value>
</property>
--><property><name>hive.execution.engine</name><value>mr</value></property><property><name>hive.metastore.schema.verification</name><value>false</value><description>Enforce metastore schema version consistency.True: Verify that version information stored in is compatible with one from Hive jars. Also disable automaticschema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensuresproper metastore schema migration. (Default)False: Warn if the version information stored in metastore doesn't match with one from in Hive jars.</description></property>
</configuration>
注意:上面配置文件中的路径在 vi 编辑器下 全局替换
:%s@\\${system:java.io.tmpdir}@/tmp/hive-log@g
不要使用图形化 不然每次保存后3215行都会有个  特殊字符 如果产生删除即可 具体报错信息 后面有单独的描述
上传 MySQL 连接驱动 jar 包到 hive 安装目录的lib目录下:
/opt/soft/hive3/lib
jar 包有两个 分别为:
- mysql-connector-j-8.0.32.jar
- protobuf-java-3.22.2.jar
删除原有的 protobuf-java-2.5.0.jar 文件
guava版本冲突
删除 hive/lib目录中的 guava-19.0.jar
拷贝hadoop/share/hadoop/common/lib目录中的 guava-27.0-jre.jar 到 hive/lib 目录
rm -f /opt/soft/hive3/lib/guava-19.0.jar
cp -v /opt/soft/hadoop3/share/hadoop/common/lib/guava-27.0-jre.jar /opt/soft/hive3/lib
配置环境变量
vim /etc/profile
export HIVE_HOME=/opt/soft/hive3
export PATH=$PATH:$HIVE_HOME/bin
source /etc/profile
初始化hive的元数据库
注意:初始初始元素中库之前 保证 hadoop 和 mysql 正常启动
schematool -initSchema -dbType mysql
报错解解决:
Exception in thread "main" java.lang.RuntimeException: com.ctc.wstx.exc.WstxParsingException: Illegal character entity: expansion character (code 0x8
at [row,col,system-id]: [3215,96,"file:/usr/local/hive/conf/hive-site.xml"]
at org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:3051)
...
at org.apache.hadoop.util.RunJar.main(RunJar.java:236)
Caused by: com.ctc.wstx.exc.WstxParsingException: Illegal character entity: expansion character (code 0x8
at [row,col,system-id]: [3215,96,"file:/usr/local/hive/conf/hive-site.xml"]
at com.ctc.wstx.sr.StreamScanner.constructWfcException(StreamScanner.java:621)
...
at org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:3034)
... 17 more报错原因:hive-site.xml配置文件中,3215行(见报错记录第二行)有特殊字符解决办法:进入hive-site.xml文件,跳转到对应行,删除里面的  特殊字符即可
Caused by: java.net.URISyntaxException: Relative path in absolute URI: ${system:java.io.tmpdir%7D/$%7Bsystem:user.name%7Dat java.net.URI.checkPath(URI.java:1822)at java.net.URI.<init>(URI.java:745)at org.apache.hadoop.fs.Path.initialize(Path.java:260)解决方案:将hive-site.xml配置文件的
hive.querylog.location
hive.exec.local.scratchdir
hive.downloaded.resources.dir
三个值(原始为$标识的相对路径)写成绝对值
# 全局替换
:%s@\\${system:java.io.tmpdir}@/tmp/hive-log@g
远程模式
# 启动服务端
hive --service metastore &
hive --service hiveserver2 &# 后台运行
nohup hive --service metastore > /dev/null 2>&1 &
nohup hive --service hiveserver2 > /dev/null 2>&1 &hiveserver2 start
nohup hiveserver2 >/dev/null 2>&1 &
# 客户端连接
hive
beeline -u jdbc:hive2://spark01:10000 -n root
beeline jdbc:hive2://spark01:10000> show databases;
体验
use default;
create table person (id int,phonenum bigint,salary dicimal,name string
);
show tables;
insert into person values (1001,13966668888,9999.99,"张三");
songsong,bingbing_lili,xiao song:18_xiaoxiao song:19
longlong,pingping_liuliu,xiao long:8_xiaoxiao long:9
drop table person;
create table person (name string,friends array<string>,childrens map<string,int>
)row format delimited fields terminated by ',' collection items terminated by '_' map keys terminated by ':' lines terminated by '\\n';
load data local inpath '/root/person.txt' into table person;
drop table data;
create table data (name string, amount int
)row format delimited fields terminated by ',' lines terminated by '\\n';load data local inpath '/root/data.txt' into table data;
select count(*) from data;
select count(*) from data group by name;
select name,max(t) from data group by name;
select name,max(t) from data group by name order by max(t) ;
DDL
操作数据库
创建数据库
-- 创建数据库不指定路径
create database db_hive01;-- 创建数据库指定 hdfs 路径
create database db_hive02 location '/db_hive02';-- 创建数据库附加 dbproperties
create database db_hive03 with dbproperties ('create-date'='2023-04-17','create_author'='lihaozhe');
查询数据库
-- 查看所有数据库
show databases;-- 模糊查看所有数据库
-- * 代表所有
-- | 代表或
show databases like 'db_hive*';-- 查看数据库信息
desc database db_hive03;-- 查看数据库详尽信息
describe database db_hive03;-- 查看数据库更详尽信息
describe database extended db_hive03;
修改数据库
-- 修改 dbproperties
alter database db_hive03 SET dbproperties ('crate_data'='2023-04-18');-- 修改location
alter database db_hive02 SET location '/db_hive002';-- 修改 owner user
alter database database_name set owner user lhz;
删除数据库
-- 删除空数据库
drop database db_hive02 restrict;
-- 删除非空数据库
drop database db_hive03 cascade;
切换数据库
use db_hive01;
操作数据表
普通表
临时表 temporary
外部表 external
-- 利用 select 语句查询结果 创建一张表
create table as select-- 复刻一张已经存在的表结构 但是 不包含数据
create table like
基本数据类型
数据类型 | 说明 | 定义 |
---|---|---|
tinyint | 1 byte 有符号整型 | |
smallint | 2 byte 有符号整型 | |
int | 4 byte 有符号整型 | |
bigint | 8 byte 有符号整型 | |
float | 4 byte 单精度浮点数 | |
double | 8 byte 双精度浮点数 | |
dicimal | 十进制精准数据类型 | |
varchar | 字符序列 需要指定最大长度 范围[1~65535] | |
string | 字符串 无需指定最大长度 | |
timestamp | 时间 | |
binary | 二进制数据 | |
boolean | true false | |
array | 一组相同数据类型的集合 | array<string> |
map | 一组相同数据类型的键值对 | map<string,int> |
struct | 由多个属性组成,每个属性都有自己的属性名和数据类型 | struct<id:int,name:string> |
内部表
简单表
create table person (id int,phonenum bigint,salary dicimal,name string
);
show tables;
insert into person values (1001,13966668888,9999.99,"张三");
简单数据类型
create table data (name string, amount int
)row format delimited fields terminated by ',' lines terminated by '\\n'
location '/user/hive/warehouse/lihaozhe.db/data';
# 上传文件到Hive表指定的路径
hdfs dfs -put /root/data.csv /user/hive/warehouse/lihaozhe.db/data
复杂数据类型
vim /root/person.txt
songsong,bingbing_lili,xiao song:18_xiaoxiao song:19
longlong,pingping_liuliu,xiao long:8_xiaoxiao long:9
drop table person;
create table person (name string,friends array<string>,childrens map<string,int>
)row format delimited fields terminated by ',' collection items terminated by '_' map keys terminated by ':' lines terminated by '\\n';load data local inpath '/root/person.txt' into table person;
json数据类型
json函数
get_json_object
json_tuple
json serde 加载数据
外部表
create external table data (name string, amount int
)row format delimited fields terminated by ',' lines terminated by '\\n'
location '/user/hive/warehouse/lihaozhe.db/data';
查看表
-- 查看表
show tables;-- 查看某数据库下的某张表
show tables in lihaozhe;-- 查看表
show tables;-- 模糊查看数据表
-- * 代表所有
-- | 代表或
show tables like 'per*';-- 查看基本表信息
describe person;-- 查看基本表信息
describe person;-- 查看基本表详细信息
describe extended person;-- 查看基本表详细信息并格式化展示
describe formated person;