Flume 1.7 Setup - nijov/myPOCs GitHub Wiki
Install jdk
Set up JAVA_HOME = jdk 1.7
*********************** FLUME SET UP *******************************
#------------------------ set up conf/example.conf => configuration for source, channel, sink #------------------------
a1.sources = r1 a1.sinks = k1 a1.sinks = k2 a1.channels = c1
a1.sources.r1.type = netcat a1.sources.r1.bind = localhost a1.sources.r1.port = 44444
#a1.sources.r1.type = exec #a1.sources.r1.command = tail -f /tmp/test.log #a1.sources.r1.batchSize = 1 #a1.sources.r1.interceptors = i1 #a1.sources.r1.interceptors.i1.type = timestamp
#--save to local file #a1.sinks.k1.channel = c1 #a1.sinks.k1.type = file_roll #a1.sinks.k1.sink.directory = /tmp
#-show in console #a1.sinks.k1.channel = c1 #a1.sinks.k1.type = logger #a1.sinks.k1.serializer = TEXT
#-Save in Hadoop a1.sinks.k2.channel = c1 a1.sinks.k2.type = hdfs a1.sinks.k2.hdfs.path = hdfs://localhost:9000/user/nijovarghese/logsdata a1.sinks.k2.hdfs.fileType=DataStream a1.sinks.k2.hdfs.writeFormat = Text
Twitter-agent.sinks.sink1.hdfs.writeFormat=Text
a1.sinks.k2.serializer = text
a1.channels.c1.type = memory a1.channels.c1.capacity = 100 a1.channels.c1.transactionCapacity = 10
a1.sources.r1.channels = c1
#------------------------ set up conf/flume-env.sh => flumeclasspath for all external jars required #------------------------
#MAC OS FLUME_CLASSPATH="" FLUME_CLASSPATH=/Users/nijovarghese/hadoop/hadoop-2.8.5/share/hadoop/common/:/Users/nijovarghese/hadoop/hadoop-2.8.5/share/hadoop/hdfs/ FLUME_CLASSPATH=$FLUME_CLASSPATH:/Users/nijovarghese/hadoop/hadoop-2.8.5/share/hadoop/common/lib/*
#EC2 configuration for FLUME_CLASSPATH #FLUME_CLASSPATH=/home/ec2-user/hadoop-2.7.7/share/hadoop/common/:/home/ec2-user/hadoop-2.7.7/share/hadoop/hdfs/ #FLUME_CLASSPATH=$FLUME_CLASSPATH:/home/ec2-user/hadoop-2.7.7/share/hadoop/common/lib/* ~ #------------------------ copy the hadoop jars to flume lib folder #------------------------
cp /home/ec2-user/hadoop-2.7.7/share/hadoop/common/.jar /home/ec2-user/apache-flume-1.7.0-bin/lib/ cp /home/ec2-user/hadoop-2.7.7/share/hadoop/common/lib/.jar /home/ec2-user/apache-flume-1.7.0-bin/lib/
Jars --needed -rw-r--r-- 1 ccms ccms 3566844 Jan 18 14:18 core-3.1.1.jar -rw-r--r-- 1 ccms ccms 4203147 Jan 18 14:33 hadoop-core-1.2.1.jar -rw-r--r-- 1 ccms ccms 188671 Jan 18 16:43 commons-beanutils-1.7.0.jar -rw-r--r-- 1 ccms ccms 206035 Jan 18 16:43 commons-beanutils-core-1.8.0.jar -rw-r--r-- 1 ccms ccms 41123 Jan 18 16:43 commons-cli-1.2.jar -rw-r--r-- 1 ccms ccms 58160 Jan 18 16:43 commons-codec-1.4.jar -rw-r--r-- 1 ccms ccms 575389 Jan 18 16:43 commons-collections-3.2.1.jar -rw-r--r-- 1 ccms ccms 298829 Jan 18 16:43 commons-configuration-1.6.jar -rw-r--r-- 1 ccms ccms 13619 Jan 18 16:43 commons-daemon-1.0.1.jar -rw-r--r-- 1 ccms ccms 143602 Jan 18 16:43 commons-digester-1.8.jar -rw-r--r-- 1 ccms ccms 112341 Jan 18 16:43 commons-el-1.0.jar -rw-r--r-- 1 ccms ccms 279781 Jan 18 16:43 commons-httpclient-3.0.1.jar -rw-r--r-- 1 ccms ccms 163151 Jan 18 16:43 commons-io-2.1.jar -rw-r--r-- 1 ccms ccms 261809 Jan 18 16:43 commons-lang-2.4.jar -rw-r--r-- 1 ccms ccms 26202 Jan 18 16:43 commons-logging-api-1.0.4.jar -rw-r--r-- 1 ccms ccms 60686 Jan 18 16:43 commons-logging-1.1.1.jar -rw-r--r-- 1 ccms ccms 832410 Jan 18 16:43 commons-math-2.1.jar -rw-r--r-- 1 ccms ccms 273370 Jan 18 16:43 commons-net-3.1.jar -rw-r--r-- 1 root root 3054 Jan 30 16:57 flume-ssc-client-0.0.1.jar
#------------------------ Start flume #------------------------
export JAVA_HOME=/Users/nijovarghese/jdk_7/ bin/flume-ng agent --conf conf --conf-file conf/example.conf --name a1 -Dflume.root.logger=DEBUG,console
#-- post some messages
for num in {1..120}; do echo $num; echo "date
exception New Hellow World X $num" | nc localhost 44444 ; sleep 0.01; done
echo "date
exception New Cluster World" | nc localhost 44444
*********************** Hadoop SET UP *******************************
ssh localhost
** -> If install Hadoop on Mac OSX, make sure turn on Remote Login under System Preferences then File Sharing. This worked on my machine.
ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys export HADOOP_PREFIX=/usr/local/hadoop
ssh localhost
#------------------------ configure hdfs data directory, otherwise it will store in temp directory and you will lose the data if Mac is restarted or process get killed #------------------------ dfs.namenode.name.dir /home/hduser/mydata/hdfs/namenode
dfs.datanode.data.dir /home/hduser/mydata/hdfs/datanodebin/hdfs namenode -format sbin/start-dfs.sh
bin/hdfs dfs -mkdir /user bin/hdfs dfs -mkdir /user/nijovarghese bin/hdfs dfs -mkdir /user/nijovarghese/logsdata