Impala - mwicat/personal GitHub Wiki
http://www.cloudera.com/documentation/enterprise/latest/topics/cdh_ig_cdh5_install.html
cd /etc/apt/sources.list.d/
sudo wget https://archive.cloudera.com/cdh5/debian/wheezy/amd64/cdh/cloudera.list -O cloudera.list
apt-get update
sudo apt-get install hadoop
cd /etc/apt/sources.list.d/
sudo wget http://archive.cloudera.com/impala/debian/wheezy/amd64/impala/cloudera.list -O cloudera-impala.list
apt-get update
sudo apt-get install impala impala-server impala-state-store impala-catalog impala-shell
sudo apt-get install postgresql
sudo service postgresql start
sudo service postgresql initdb
echo 'host all all 127.0.0.1/32 md5' | sudo tee -a /var/lib/pgsql/data or /etc/postgresql/8.4/main
sudo chkconfig postgresql on
sudo service postgresql restart
http://www.cloudera.com/documentation/enterprise/latest/topics/cdh_ig_hive_install.html
sudo apt-get install hive hive-metastore hive-server2
/etc/impala/conf/hive-site.xml
<property>
<name>hive.metastore.uris</name>
<value>thrift://localhost:9083</value>
</property>
<property>
<name>hive.metastore.client.socket.timeout</name>
<value>3600</value>
<description>MetaStore Client socket timeout in seconds</description>
</property>
ln -sfn /opt/hadoop/etc/hadoop/core-site.xml /etc/impala/conf/core-site.xml
ln -sfn /opt/hadoop/etc/hadoop/hdfs-site.xml /etc/impala/conf/hdfs-site.xml
edit /etc/default/impala:
IMPALA_CATALOG_SERVICE_HOST=127.0.0.1
IMPALA_STATE_STORE_HOST=127.0.0.1
sudo /etc/init.d/impala-state-store start # one node
sudo /etc/init.d/impala-catalog start # one node
sudo /etc/init.d/impala-server start # all nodes
/var/logs/impalad/
Web interface:
http://hadoop.apache.org/docs/r2.6.1/hadoop-project-dist/hadoop-hdfs/CentralizedCacheManagement.html#Configuration http://www.cloudera.com/documentation/enterprise/5-4-x/topics/impala_perf_hdfs_caching.html http://www.cloudera.com/documentation/enterprise/5-4-x/topics/impala_perf_testing.html
/opt/hadoop/bin/hdfs dfsadmin -report
/opt/hadoop/bin/hdfs cacheadmin -addPool sync_cache -owner impala -limit 8000000000
impala-shell -q "alter table ranks set cached in 'sync_cache' with replication = 2;"
/opt/hadoop/bin/hdfs cacheadmin -listDirectives -stats
impala-shell -q "show table stats ranks;"