本地测试环境启动备忘 - zhongjiajie/zhongjiajie.github.com GitHub Wiki
二进制包下载,清华大学apache国内源中有较多Apache的二进制包下载,如果没有想要的包,就去别的国内源找找或者直接官网挂VPN下载
测试环境使用hadoop Pseudo-Distributed形式的启动,详情见这里
<!-- etc/hadoop/core-site.xml -->
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/tmp/apache/hadoop/tmp/</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
<!-- etc/hadoop/hdfs-site.xml -->
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/tmp/apache/hadoop/tmp/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/tmp/apache/hadoop/tmp/dfs/data</value>
</property>
</configuration>
# 测试是否免密
$ ssh localhost
# 如果不能免密则增加配置密钥
$ ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
$ chmod 0600 ~/.ssh/authorized_keys
Mac的话去System Preferences -> Sharing -> Remote Login
启动,然后ssh-copy-id <IP>
即可
export HADOOP_VERSION="2.7.7"
# Hadoop
export HADOOP_HOME=/Users/zhongjiajie/Documents/dist/hadoop-${HADOOP_VERSION}
# JAVA
export JAVA_HOME=/path/to/java
export CLASSPATH=/path/to
# 初始化文件系统
$ $HADOOP_HOME/bin/hdfs namenode -format
# 启动HDFS
$ $HADOOP_HOME/sbin/start-dfs.sh
# 运行 jps 如果有出现 DataNode NameNode 和 SecondaryNameNode 就意味着启动成功
$ jps
432
5891 DataNode
6069 Jps
5993 SecondaryNameNode
5806 NameNode
去http://localhost:50070/
检查是否成功
# 日志存在 $HADOOP_LOG_DIR 默认是 $HADOOP_HOME/logs
# 创建用户目录
$ $HADOOP_HOME/bin/hdfs dfs -mkdir /user
$ $HADOOP_HOME/bin/hdfs dfs -mkdir /user/<username>
# 将 /etc/hadoop 文件夹的内容复制到 hdfs 的 input 文件夹 这里的 hdfs 路径是相对 /user/<username>
$HADOOP_HOME/bin/hdfs dfs -put $HADOOP_HOME/etc/hadoop input
# 运行提供的例子
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-${HADOOP_VERSION}.jar grep input output 'dfs[a-z.]+'
# 检查输出的结果 同样第一个output是相关 /user/<username> 的路径
$HADOOP_HOME/bin/hdfs dfs -get output $HADOOP_HOME/output
cat output/*
# 或者使用hdfs命令查看
$HADOOP_HOME/bin/hdfs dfs -cat output/*
$HADOOP_HOME/sbin/stop-hdfs.sh
# 检查是否退出成功
jps
HIVE_VERSION=3.1.2
# cat ~/.zhsrc
export HIVE_HOME=/Users/zhongjiajie/Documents/dist/apache-hive-${HIVE_VERSION}-bin
# hive-site.sh
cp $HIVE_HOME/conf/hive-site.sh.template $HIVE_HOME/conf/hive-site.sh
# hive-default.xml
touch $HIVE_HOME/conf/hive-site.xml
$HADOOP_HOME/bin/hdfs dfs -mkdir -p /user/hive/warehouse
$HADOOP_HOME/bin/hdfs dfs -mkdir -p /user/hive/tmp
$HADOOP_HOME/bin/hdfs dfs -mkdir -p /user/hive/log
$HADOOP_HOME/bin/hdfs dfs -chmod -R 777 /user/hive/warehouse
$HADOOP_HOME/bin/hdfs dfs -chmod -R 777 /user/hive/tmp
$HADOOP_HOME/bin/hdfs dfs -chmod -R 777 /user/hive/log
# hive-site.sh
export JAVA_HOME=/opt/java # Java路径 如果通过brew安装的则不需要配置
export HADOOP_HOME=Users/zhongjiajie/Documents/dist/hadoop-${HADOOP_VERSION} # Hadoop安装路径
export HIVE_HOME=/Users/zhongjiajie/Documents/dist/apache-hive-${HIVE_VERSION}-bin # Hive安装路径
export HIVE_CONF_DIR=${HIVE_HOME}/conf # Hive配置文件路径
<!-- hive-site.xml -->
<configuration>
<!-- 运行作业对应的文件夹 -->
<property>
<name>hive.exec.scratchdir</name>
<value>/user/hive/tmp</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
</property>
<property>
<name>hive.querylog.location</name>
<value>/user/hive/log</value>
</property>
<!-- mysql连接参数 默认是使用derby -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://127.0.0.1:3306/hive?createDatabaseIfNotExist=true&characterEncodig=UTF-8&useSSL=false</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>mysql</value>
</property>
</configuration>
$HIVE_HOME/bin/schematool -dbType mysql -initSchema
# hive-cli
$HIVE_HOME/bin/hive
cp $HIVE_HOME/conf/hive-log4j2.properties.template $HIVE_HOME/conf/hive-log4j2.properties
# list of properties
property.hive.log.dir = /tmp/apache/hive/log
<!-- hive-site.xml -->
<configuration>
<!-- 这两个是本地路径 hiveserver2的配置 -->
<property>
<name>hive.exec.local.scratchdir</name>
<value>/tmp/apache/hive/exec</value>
<description>Local scratch space for Hive jobs</description>
</property>
<property>
<name>hive.downloaded.resources.dir</name>
<value>/tmp/apache/hive/resources/${hive.session.id}</value>
<description>Temporary local directory for added resources in the remote file system.</description>
</property>
<!-- hiveserver2连接配置 -->
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
<description>Port number of HiveServer2 Thrift interface when hive.server2.transport.mode is 'binary'.</description>
</property>
<property>
<name>hive.server2.thrift.client.user</name>
<value>zhongjiajie</value>
<description>Username to use against thrift client</description>
</property>
<property>
<name>hive.server2.thrift.client.password</name>
<value></value>
<description>Password to use against thrift client</description>
</property>
<!-- hiveserver2 webui 配置 -->
<property>
<name>hive.server2.webui.host</name>
<value>0.0.0.0</value>
<description>The host address the HiveServer2 WebUI will listen on</description>
</property>
<property>
<name>hive.server2.webui.port</name>
<value>10002</value>
<description>The port the HiveServer2 WebUI will listen on. This can beset to 0 or a negative integer to disable the web UI</description>
</property>
</configuration>
<!-- core-site.xml -->
<!-- zhongjiajie替换成beeline需要连接的用户名 -->
<configuration>
<property>
<name>hadoop.proxyuser.zhongjiajie.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.zhongjiajie.groups</name>
<value>*</value>
</property>
</configuration>
-
$HIVE_HOME/bin/beeline -u jdbc:hive2://127.0.0.1:10000/default -n zhongjiajie
。或者先$HIVE_HOME/bin/beeline
然后再!connect jdbc:hive2://<host>:<port>/<db>
- 打开网址
localhost:10002
- 查看日志
tail -f -n 100 /tmp/apache/hive/log/hive.log
测试环境使用standalone+spark-thriftserver的架构,web端口8088,spark端口7077,STS端口10080
- 启动spark-master:
$SPARK_HOME/sbin/start-master.sh --host ubuntu --port 7077 --webui-port 8088
- 启动spark-slave:
$SPARK_HOME/sbin/start-slave.sh spark://ubuntu:7077
- 启动spark-thriftserver:
$SPARK_HOME/sbin/start-thriftserver.sh --master spark://ubuntu:7077 --executor-memory 512m --conf spark.cores.max=2 --hiveconf hive.server2.thrift.port=10080
- beeline:
$SPARK_HOME/bin/beeline -u jdbc:hive2://ubuntu:10080
- 停止spark-master:
$SPARK_HOME/sbin/stop-master.sh
- 停止spark-slave:
$SPARK_HOME/sbin/stop-slave.sh
- 停止spark-thriftserver:
$SPARK_HOME/sbin/stop-thriftserver.sh
- beeline:
!q