> 文章列表 > Apache Altas单机版安装

Apache Altas单机版安装

Apache Altas单机版安装

hadoop安装

1、JDK安装,省略

2、 免密码登录
如果这一步失败,请先执行上面登录本机步骤:cd ~/.ssh/
会有提示,都按回车就可以:ssh-keygen -t rsa
加入授权* :cat ./id_rsa.pub >> ./authorized_keys
再一次实验登录本机,发现可以直接登录不用密码了:ssh localhost
3.1 修改配置文件 core-site.xml
进入目录:cd /usr/local/hadoop/hadoop-3.1.3/etc/hadoop

修改文件:sudo vim ./core-site.xml

<configuration>
 <property>
        <name>hadoop.tmp.dir</name>
        <value>file:/usr/local/hadoop/hadoop-3.1.3/tmp</value>
        <description>Abase for other temporary directories.</description>
 </property>
 <property>
        <name>fs.defaultFS</name>
        <value>hdfs://localhost:9000</value>
 </property>
</configuration>

3.2 修改配置文件 hdfs-site.xml
sudo vim ./hdfs-site.xml

<configuration>
 <property>
    <name>dfs.replication</name>
    <value>1</value>
 </property>
 <property>
    <name>dfs.namenode.name.dir</name>
    <value>file:/usr/local/hadoop/hadoop-3.1.3/tmp/dfs/name</value>
 </property>
 <property>
    <name>dfs.datanode.data.dir</name>
    <value>file:/usr/local/hadoop/hadoop-3.1.3/tmp/dfs/data</value>
 </property>
</configuration>

<property>
<name>dfs.namenode.http-address</name>
<value>hadoop02:9870</value>
<description></description>
</property>

[root@hadoop hadoop]# rm -rf /usr/local/software/hadoop/hdfs/name/
[root@hadoop hadoop]# rm -rf /usr/local/software/hadoop/hdfs/data/

3.3 执行NameNode的格式化
cd /usr/local/hadoop/hadoop-3.1.3
./bin/hdfs namenode -format

如果报错执行
hadoop: ERROR: Unable to kill 95115 Stopping datanodes
删除root用户启动时的残留数据
#rm -rf /tmp/hadoop*  /tmp/hsperfdata*

二、安装hive
[root@hadoop 4_hive]# ls
apache-hive-3.1.2-bin  apache-hive-3.1.2-bin.tar.gz
[root@hadoop 4_hive]# mv apache-hive-3.1.2-bin ../hive

[root@hadoop conf]# cat hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://192.168.10.235:3306/hive139?createDatabaseIfNotExist=true</value>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.jdbc.Driver</value>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>root</value>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>root</value>
  </property>
  <property>
    <name>hive.server2.thrift.client.user</name>
    <value>root</value>
  </property>
  <property>
    <name>hive.server2.thrift.client.password</name>
    <value>root</value>
  </property>
</configuration>

拷贝驱动
拷贝mysql驱动包到hive/lib文件中

初始化
[root@hadoop app]#  schematool -dbType mysql -initSchema
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/app/hive/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/software/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Metastore connection URL:        jdbc:mysql://192.168.10.235:3306/hive312?createDatabaseIfNotExist=true
Metastore Connection Driver :    com.mysql.jdbc.Driver
Metastore connection User:       root
Starting metastore schema initialization to 3.1.0
Initialization script hive-schema-3.1.0.mysql.sql

[root@hadoop bin]# sh hiveserver2 &
User: root is not allowed to impersonate anonymous
提示没有权限

解决办法:
修改Hadoop配置文件core-site.xml
在里面添加如下内容:

<property>
    <name>hadoop.proxyuser.root.hosts</name>
    <value>*</value>
</property>
<property>
    <name>hadoop.proxyuser.root.groups</name>
    <value>*</value>
</property>

验证:
[root@hadoop bin]# ps -ef |grep hive
root      33372  28260  8 14:27 pts/2    00:00:34 /usr/java/jdk1.8.0_131/bin/java -Dproc_jar -Dproc_hiveserver2 -Dlog4j.configurationFile=hive-log4j2.properties -Djava.util.logging.config.file=/opt/app/hive/conf/parquet-logging.properties -Djline.terminal=jline.UnsupportedTerminal -Dyarn.log.dir=/usr/local/software/hadoop/logs -Dyarn.log.file=hadoop.log -Dyarn.home.dir=/usr/local/software/hadoop -Dyarn.root.logger=INFO,console -Djava.library.path=/usr/local/software/hadoop/lib/native -Xmx256m -Dhadoop.log.dir=/usr/local/software/hadoop/logs -Dhadoop.log.file=hadoop.log -Dhadoop.home.dir=/usr/local/software/hadoop -Dhadoop.id.str=root -Dhadoop.root.logger=INFO,console -Dhadoop.policy.file=hadoop-policy.xml -Dhadoop.security.logger=INFO,NullAppender org.apache.hadoop.util.RunJar /opt/app/hive/lib/hive-service-3.1.2.jar org.apache.hive.service.server.HiveServer2
root      96507  28260  0 14:34 pts/2    00:00:00 grep --color=auto hive
[root@hadoop bin]#  beeline -u jdbc:hive2://192.168.10.234:10000
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/app/hive/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/software/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Connecting to jdbc:hive2://192.168.10.234:10000
Connected to: Apache Hive (version 3.1.2)
Driver: Hive JDBC (version 3.1.2)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 3.1.2 by Apache Hive
0: jdbc:hive2://192.168.10.234:10000> show databases;
INFO  : Compiling command(queryId=root_20230327143450_7937d0bb-d45a-4c9d-bcbe-5b3e59d3c0be): show databases
INFO  : Concurrency mode is disabled, not creating a lock manager
INFO  : Semantic Analysis Completed (retrial = false)
INFO  : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:database_name, type:string, comment:from deserializer)], properties:null)
INFO  : Completed compiling command(queryId=root_20230327143450_7937d0bb-d45a-4c9d-bcbe-5b3e59d3c0be); Time taken: 1.034 seconds
INFO  : Concurrency mode is disabled, not creating a lock manager
INFO  : Executing command(queryId=root_20230327143450_7937d0bb-d45a-4c9d-bcbe-5b3e59d3c0be): show databases
INFO  : Starting task [Stage-0:DDL] in serial mode
INFO  : Completed executing command(queryId=root_20230327143450_7937d0bb-d45a-4c9d-bcbe-5b3e59d3c0be); Time taken: 0.041 seconds
INFO  : OK
INFO  : Concurrency mode is disabled, not creating a lock manager
+----------------+
| database_name  |
+----------------+
| default        |
+----------------+
1 row selected (1.469 seconds)
0: jdbc:hive2://192.168.10.234:10000>

三、安装zk

具体过程省略
Problem starting AdminServer on address 0.0.0.0, port 8080

我们可以修改在zoo.cfg中修改AdminServer的端口:

admin.serverPort=8888

四、安装hbase
修改hbase-site.xml 内容:

<configuration>
    <property>
        <name>hbase.rootdir</name>
        <value>hdfs://192.168.10.234:9000/HBase</value>
    </property>
    
    <property>
        <name>hbase.cluster.distributed</name>
        <value>false</value>
    </property>
    <property>
        <name>hbase.master.port</name>
        <value>16000</value>
    </property>
    
    <property> 
        <name>hbase.zookeeper.quorum</name>
        <value>192.168.10.234</value>
     </property>
     
    <property> 
        <name>hbase.zookeeper.property.dataDir</name>
        <value>/opt/app/zookeeper/zkData</value>
    </property>
</configuration>

java.io.IOException: Could not start ZK at requested port of 2181.  ZK was started at port: 2182.  Aborting as clients (e.g. shell) will not be able to find this ZK quorum.
        at org.apache.hadoop.hbase.master.HMasterCommandLine.startMaster(HMasterCommandLine.java:217)
        at org.apache.hadoop.hbase.master.HMasterCommandLine.run(HMasterCommandLine.java:140)
        at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
        at org.apache.hadoop.hbase.util.ServerCommandLine.doMain(ServerCommandLine.java:149)
        at org.apache.hadoop.hbase.master.HMaster.main(HMaster.java:3080)
在hbase-site.xml中增加一条配置信息:

<property>
    <name>hbase.zookeeper.property.clientPort</name>
    <value>2182</value>
</property>

验证:
http://192.168.10.234:16030/processRS.jsp

五、Solr安装

修改solr.in.sh配置,添加zk

[root@hadoop bin]# ./solr  start &
[3] 92208
[root@hadoop bin]# *** [WARN] *** Your open file limit is currently 1024.
 It should be set to 65000 to avoid operational disruption.
 If you no longer wish to see this warning, set SOLR_ULIMIT_CHECKS to false in your profile or solr.in.sh
WARNING: Starting Solr as the root user is a security risk and not considered best practice. Exiting.
         Please consult the Reference Guide. To override this check, start with argument '-force'

编辑limits.conf文件:

sudo vim /etc/security/limits.conf
1.针对Your open file limit is currently 1024,增加hard nofile和soft nofile的配置

2.针对Your Max Processes Limit is currently 47448,增加hard nproc和soft nproc的配置

1和2合起来就是在limits.conf文件追加4行:

* hard nofile 65535
* soft nofile 65535
* hard nproc 65535
* soft nproc 65535

[root@hadoop bin]# ls
init.d  install_solr_service.sh  oom_solr.sh  post  solr  solr-8983.pid  solr.cmd  solr.in.cmd  solr.in.sh
[root@hadoop bin]# ./solr start &
[1] 81578
[root@hadoop bin]# WARNING: Starting Solr as the root user is a security risk and not considered best practice. Exiting.
         Please consult the Reference Guide. To override this check, start with argument '-force'
^C
[1]+  退出 1                ./solr start
切换非root启动
[root@hadoop ~]# solr stop -all
bash: solr: 未找到命令...
[root@hadoop ~]# cd /opt/app/solr/bin/
[root@hadoop bin]# ./solr stop -all
[root@hadoop bin]# ./solr -e dih 

ERROR: Unsupported example dih ! Please choose one of: cloud, dih, schemaless, or techproducts

[root@hadoop bin]#
[guolin@hadoop bin]$ ./solr start &

[1] 44230
[guolin@hadoop bin]$ Waiting up to 180 seconds to see Solr running on port 8983 [\\]
Started Solr server on port 8983 (pid=44429). Happy searching!
http://192.168.10.234:8983/solr/#/

六、安装kafka
解压,修改conf目录下面的server.properties文件
ZK 地址,plentix地址等
启动
./kafka-server-start ../config/server.properties &

七、安装Atlas
1、解压
tar -zxvf apache-atlas-2.1.0-server.tar.gz

2、Atlas 集成 Hbase
修改 /atlas/conf/atlas-application.properties 配置文件中的以下参数
atlas.graph.storage.hostname=hadoop102:2181,hadoop103:2181,hadoop104:2181
修改 /opt/module/atlas/conf/atlas-env.sh 配置文件,增加以下内容
export HBASE_CONF_DIR=/opt/module/hbase/conf

3. Atlas 集成 Solr
Atlas集成Solr
修改atlas/conf/atlas-application.properties配置文件,命令:vim /home/atlas/atlas/conf/atlas-application.properties
#Solr 这里的注释掉
#Solr cloud mode properties
#atlas.graph.index.search.solr.mode=cloud
#atlas.graph.index.search.solr.zookeeper-url=
#atlas.graph.index.search.solr.zookeeper-connect-timeout=60000
#atlas.graph.index.search.solr.zookeeper-session-timeout=60000
#atlas.graph.index.search.solr.wait-searcher=true#Solr http mode properties
atlas.graph.index.search.solr.mode=http
atlas.graph.index.search.solr.http-urls=http://localhost:2181/solr
复制文件,命令:cp -rf /home/atlas/atlas/conf/solr /home/atlas/solr/atlas_conf
执行下列命令
sudo -i -u solr /home/atlas/solr/bin/solr create -c vertex_index -d /home/atlas/solr/atlas_conf

[root@hadoop conf]# cp -rf  solr/ /opt/app/altas/solr/atlas_conf
cp: 无法创建目录"/opt/app/altas/solr/atlas_conf": 没有那个文件或目录
[root@hadoop conf]# mkdir -p /opt/app/altas/solr
[root@hadoop conf]# cp -rf  solr/ /opt/app/altas/solr/atlas_conf
[root@hadoop conf]# sudo -i -u guolin  /opt/app/solr/bin/solr create -c vertex_index -d /opt/app/altas/solr/atlas_conf
Created collection 'vertex_index' with 1 shard(s), 1 replica(s) with config-set 'vertex_index'
[root@hadoop conf]#

4、Atlas集成Kafka
修改atlas/conf/atlas-application.properties配置文件,命令:vim /home/atlas/atlas/conf/atlas-application.properties
atlas.notification.embedded=false 
atlas.kafka.data=/home/atlas/kafka/data 
atlas.kafka.zookeeper.connect=localhost:2181/kafka 
atlas.kafka.bootstrap.servers=localhost:9092

5、Atlas Server 配置
修改atlas/conf/atlas-application.properties配置文件,命令:vim /home/atlas/atlas/conf/atlas-application.properties
atlas.server.run.setup.on.start=false
修改atlas-log4j.xml文件,命令:vim /home/atlas/atlas/conf/atlas-log4j.xml
#去掉下面代码的注释
<appender name="perf_appender" class="org.apache.log4j.DailyRollingFileAppender">
    <param name="file" value="${atlas.log.dir}/atlas_perf.log" />
    <param name="datePattern" value="'.'yyyy-MM-dd" />
    <param name="append" value="true" />
    <layout class="org.apache.log4j.PatternLayout">
    <param name="ConversionPattern" value="%d|%t|%m%n" /></layout>
</appender>
<logger name="org.apache.atlas.perf" additivity="false">
    <level value="debug" />
    <appender-ref ref="perf_appender" />
</logger>

6、Atlas集成Hive

Atlas集成Hive
修改atlas/conf/atlas-application.properties配置文件,命令:vim /home/atlas/atlas/conf/atlas-application.properties
#在文件末尾追加
######### Hive Hook Configs ####### 
atlas.hook.hive.synchronous=false 
atlas.hook.hive.numRetries=3 
atlas.hook.hive.queueSize=10000 
atlas.cluster.name=primary

修改hive程序中hive-site.xml文件,命令:vim hive/conf/hive-site.xml
#在configuration标签里追加
<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.atlas.hive.hook.HiveHook</value> 
</property> 

7、安装Hive Hook
解压Hive Hook,命令:tar -zxvf apache-atlas-2.1.0-hive-hook.tar.gz
将Hive Hook目录里的文件依赖复制到Atlas 安装路径,命令:cp -r apache-atlas-hive-hook-2.1.0/* /home/atlas/atlas/
修改hive/conf/hive-env.sh配置文件,命令:vim /home/atlas/hive/conf/hive-env.sh
export HIVE_AUX_JARS_PATH=/home/atlas/atlas/hook/hive
将Atlas 配置文件/home/atlas/atlas/conf/atlas-application.properties 拷贝到/home/atlas/hive/conf 目录,
命令:cp /home/atlas/atlas/conf/atlas-application.properties /home/atlas/hive/conf/

[root@hadoop 9_atlas]# cd apache-atlas-hive-hook-2.1.0/
[root@hadoop apache-atlas-hive-hook-2.1.0]# ls
hook  hook-bin
[root@hadoop apache-atlas-hive-hook-2.1.0]# cd /opt/app/altas/
[root@hadoop altas]# ls
bin  conf  DISCLAIMER.txt  LICENSE  models  NOTICE  server  solr  tools
[root@hadoop altas]# cp -r /opt/app/9_atlas/apache-atlas-hive-hook-2.1.0/* .
[root@hadoop altas]# ls
bin  conf  DISCLAIMER.txt  hook  hook-bin  LICENSE  models  NOTICE  server  solr  tools
[root@hadoop altas]# cd  /opt/app/hive/conf/
[root@hadoop conf]# ls
beeline-log4j2.properties.template    hive-log4j2.properties.template      llap-daemon-log4j2.properties.template
hive-default.xml.template             hive-site.xml                        nohup.out
hive-env.sh.template                  ivysettings.xml                      parquet-logging.properties
hive-exec-log4j2.properties.template  llap-cli-log4j2.properties.template
[root@hadoop conf]# cp hive-env.sh.template  hive-env.sh
[root@hadoop conf]# vi hive-env.sh
[root@hadoop conf]# cat hive-env.sh

export HIVE_AUX_JARS_PATH=/opt/app/atals/hook/hive
[root@hadoop conf]# cat /opt/app/
1_jdk8/      4_hive/      7_hbase/     altas/       hive/        nohup.out    zookeper/
2_hadoop/    5_zookeeper/ 8_solr/      hadoop/      init.sh      solr/
3_mysql/     6_kafka/     9_atlas/     hbase/       kafka/       zookeeper/
[root@hadoop conf]# cat /opt/app/altas/hook
hook/     hook-bin/
[root@hadoop conf]# cat /opt/app/altas/hook/hive/
cat: /opt/app/altas/hook/hive/: 是一个目录
[root@hadoop conf]# ll
总用量 344
-rwxr-xr-x 1 root root   1596 8月  23 2019 beeline-log4j2.properties.template
-rwxr-xr-x 1 root root 300482 7月  16 2020 hive-default.xml.template
-rwxr-xr-x 1 root root   2385 3月  31 14:17 hive-env.sh
-rwxr-xr-x 1 root root   2365 8月  23 2019 hive-env.sh.template
-rwxr-xr-x 1 root root   2274 8月  23 2019 hive-exec-log4j2.properties.template
-rwxr-xr-x 1 root root   3086 8月  23 2019 hive-log4j2.properties.template
-rw-r--r-- 1 root root    965 3月  31 11:42 hive-site.xml
-rwxr-xr-x 1 root root   2060 8月  23 2019 ivysettings.xml
-rwxr-xr-x 1 root root   3558 8月  23 2019 llap-cli-log4j2.properties.template
-rwxr-xr-x 1 root root   7163 8月  23 2019 llap-daemon-log4j2.properties.template
-rw------- 1 root root   1231 3月  27 14:34 nohup.out
-rwxr-xr-x 1 root root   2662 8月  23 2019 parquet-logging.properties
[root@hadoop conf]# vi hive-env.sh
[root@hadoop conf]# pwd
/opt/app/hive/conf
[root@hadoop conf]# ls
beeline-log4j2.properties.template  hive-exec-log4j2.properties.template  llap-cli-log4j2.properties.template
hive-default.xml.template           hive-log4j2.properties.template       llap-daemon-log4j2.properties.template
hive-env.sh                         hive-site.xml                         nohup.out
hive-env.sh.template                ivysettings.xml                       parquet-logging.properties
[root@hadoop conf]# cp /opt/app/altas/conf/atlas-application.properties .
[root@hadoop conf]# ls
atlas-application.properties        hive-exec-log4j2.properties.template  llap-daemon-log4j2.properties.template
beeline-log4j2.properties.template  hive-log4j2.properties.template       nohup.out
hive-default.xml.template           hive-site.xml                         parquet-logging.properties
hive-env.sh                         ivysettings.xml
hive-env.sh.template                llap-cli-log4j2.properties.template
[root@hadoop conf]# cd /opt/altas/
[root@hadoop altas]# ls
[root@hadoop altas]# ls
[root@hadoop altas]# cd /opt/app/altas/
[root@hadoop altas]# ls
bin  conf  DISCLAIMER.txt  hook  hook-bin  LICENSE  models  NOTICE  server  solr  tools
[root@hadoop altas]# cd bin/
[root@hadoop bin]# ls
atlas_admin.py           atlas_kafka_setup_hook.py  atlas_stop.py                     quick_start.py
atlas_client_cmdline.py  atlas_kafka_setup.py       atlas_update_simple_auth_json.py  quick_start_v1.py
atlas_config.py          atlas_start.py             cputil.py
[root@hadoop bin]# ./atlas_start.py

执行启动脚本,命令:./atlas_start.py,等待2min
[root@hadoop bin]# ./atlas_start.py
starting atlas on host localhost
starting atlas on port 21000
................................
Apache Atlas Server started!!!

1、启动Hadoop,命令:/opt/app/目录中进入 bin/start-all.sh
2、启动Zookeeper,命令:/opt/app/zookeeper/bin/zkServer.sh start/
3、启动Kafka,命令:/opt/app/kafka/bin/kafka-server-start.sh -daemon /opt/app/config/server.properties
4、启动Hbase,命令://opt/app/hbase/bin/start-hbase.sh
5、启动Solr,命令:sudo -i -u guolin /opt/app/solr/bin/solr start
6、启动Atlas服务,进入atlas的bin目录,命令:cd /opt/app/atlas/bin

Hadoop:http://192.168.10.234:9870/explorer.html#/
Hbase:http://192.168.10.234:16030/rs-status
solr地址:http://192.168.10.234:8983/solr/#/

[root@hadoop bin]# jps
#Hadoop进程
47731 ResourceManager
43726 SecondaryNameNode
37990 NameNode
50247 NodeManager
40456 DataNode
#HBase进程
20563 HMaster
21576 HRegionServer
#Atlas、Kafka、QuorumPeerMain进程
69444 Atlas
42216 Kafka
90014 QuorumPeerMain

99726 Jps
44429 jar

http://192.168.10.234:9870/explorer.html#/