Hadoop - mwicat/personal GitHub Wiki
wget 'https://github.com/google/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.gz'
tar xzvf protobuf-2.5.0.tar.gz
cd protobuf-2.5.0
./configure
make && sudo make install
apt-get install maven protobuf-compiler build-essential g++ autoconf automake libtool cmake zlib1g-dev pkg-config libssl-dev
wget 'http://ftp.piotrkosoft.net/pub/mirrors/ftp.apache.org/hadoop/common/hadoop-2.6.2/hadoop-2.6.2-src.tar.gz'
tar zxf hadoop-2.6.2-src.tar.gz
cd hadoop-2.6.2
export JAVA_HOME=/usr/lib/jvm/java-7-oracle
mvn package -Pdist,native -DskipTests -Dtar
cp ./hadoop-dist/target/hadoop-2.6.2/lib/native/* /opt/hadoop/lib/native/
etc/hadoop/core-site.xml:
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/storage/hadoop</value>
</property>
<property>
<name>dfs.client.read.shortcircuit</name>
<value>true</value>
</property>
<property>
<name>dfsclient.read.shortcircuit.skip.checksum</name>
<value>false</value>
</property>
</configuration>
etc/hadoop/hdfs-site.xml:
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>localhost:50070</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>localhost:50090</value>
</property>
<property>
<name>dfs.datanode.max.locked.memory</name>
<value>8000000000</value>
</property>
<property>
<name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
<value>true</value>
</property>
</configuration>
format:
bin/hdfs namenode -format
list nodes:
bin/hdfs dfsadmin -report
start:
sbin/start-dfs.sh
namenode: http://localhost:50070/
datanode: http://localhost:50075/
webhdfs: http://localhost:50070/webhdfs/v1/?op=GETHOMEDIRECTORY
ubuntu >= 14:
wget https://archive.cloudera.com/cdh5/ubuntu/trusty/amd64/cdh/archive.key -O archive.key
sudo apt-key add archive.key
sudo wget 'https://archive.cloudera.com/cdh5/ubuntu/trusty/amd64/cdh/cloudera.list' \
-O /etc/apt/sources.list.d/cloudera.list
debian wheezy:
wget https://archive.cloudera.com/cdh5/debian/wheezy/amd64/cdh/archive.key -O archive.key
sudo apt-key add archive.key
sudo wget 'https://archive.cloudera.com/cdh5/debian/wheezy/amd64/cdh/cloudera.list' \
-O /etc/apt/sources.list.d/cloudera.list
sudo apt-get update
sudo apt-get install hadoop-hdfs-fuse
mkdir ~/hdfs
sudo hadoop-fuse-dfs dfs://localhost:8020 ~/hdfs
/etc/fstab:
hadoop-fuse-dfs#dfs://localhost:8020 /storage/hdfs fuse allow_other,usetrash,rw 2 0
rm -rf /storage/hadoop/*
bin/hdfs namenode -format
/opt/hadoop/bin/hadoop fs -ls /var/tmp/syncanalyst/ranks
/opt/hadoop/bin/hadoop fs -cat /var/tmp/syncanalyst/ranks/part-r-02068-35e5e2a6-0a0f-4fd0-8f26-82b481fa2ba8.snappy.parquet
/opt/hadoop/bin/hdfs cacheadmin -addDirective -path /myfile -pool testPool
git clone [email protected]:Parquet/parquet-mr.git
cd parquet-mr
mvn install