Flume 1.7 Setup - nijov/myPOCs GitHub Wiki

Install jdk

Set up JAVA_HOME = jdk 1.7

*********************** FLUME SET UP *******************************

#------------------------ set up conf/example.conf => configuration for source, channel, sink #------------------------

example.conf: A single-node Flume configuration

Name the components on this agent

a1.sources = r1 a1.sinks = k1 a1.sinks = k2 a1.channels = c1

Describe/configure the source

a1.sources.r1.type = netcat a1.sources.r1.bind = localhost a1.sources.r1.port = 44444

#a1.sources.r1.type = exec #a1.sources.r1.command = tail -f /tmp/test.log #a1.sources.r1.batchSize = 1 #a1.sources.r1.interceptors = i1 #a1.sources.r1.interceptors.i1.type = timestamp

Describe the sink

#--save to local file #a1.sinks.k1.channel = c1 #a1.sinks.k1.type = file_roll #a1.sinks.k1.sink.directory = /tmp

#-show in console #a1.sinks.k1.channel = c1 #a1.sinks.k1.type = logger #a1.sinks.k1.serializer = TEXT

#-Save in Hadoop a1.sinks.k2.channel = c1 a1.sinks.k2.type = hdfs a1.sinks.k2.hdfs.path = hdfs://localhost:9000/user/nijovarghese/logsdata a1.sinks.k2.hdfs.fileType=DataStream a1.sinks.k2.hdfs.writeFormat = Text

Twitter-agent.sinks.sink1.hdfs.writeFormat=Text

a1.sinks.k2.serializer = text

Use a channel which buffers events in memory

a1.channels.c1.type = memory a1.channels.c1.capacity = 100 a1.channels.c1.transactionCapacity = 10

Bind the source and sink to the channel

a1.sources.r1.channels = c1

#------------------------ set up conf/flume-env.sh => flumeclasspath for all external jars required #------------------------

#MAC OS FLUME_CLASSPATH="" FLUME_CLASSPATH=/Users/nijovarghese/hadoop/hadoop-2.8.5/share/hadoop/common/:/Users/nijovarghese/hadoop/hadoop-2.8.5/share/hadoop/hdfs/ FLUME_CLASSPATH=$FLUME_CLASSPATH:/Users/nijovarghese/hadoop/hadoop-2.8.5/share/hadoop/common/lib/*

#EC2 configuration for FLUME_CLASSPATH #FLUME_CLASSPATH=/home/ec2-user/hadoop-2.7.7/share/hadoop/common/:/home/ec2-user/hadoop-2.7.7/share/hadoop/hdfs/ #FLUME_CLASSPATH=$FLUME_CLASSPATH:/home/ec2-user/hadoop-2.7.7/share/hadoop/common/lib/* ~ #------------------------ copy the hadoop jars to flume lib folder #------------------------

cp /home/ec2-user/hadoop-2.7.7/share/hadoop/common/.jar /home/ec2-user/apache-flume-1.7.0-bin/lib/ cp /home/ec2-user/hadoop-2.7.7/share/hadoop/common/lib/.jar /home/ec2-user/apache-flume-1.7.0-bin/lib/

Jars --needed -rw-r--r-- 1 ccms ccms 3566844 Jan 18 14:18 core-3.1.1.jar -rw-r--r-- 1 ccms ccms 4203147 Jan 18 14:33 hadoop-core-1.2.1.jar -rw-r--r-- 1 ccms ccms 188671 Jan 18 16:43 commons-beanutils-1.7.0.jar -rw-r--r-- 1 ccms ccms 206035 Jan 18 16:43 commons-beanutils-core-1.8.0.jar -rw-r--r-- 1 ccms ccms 41123 Jan 18 16:43 commons-cli-1.2.jar -rw-r--r-- 1 ccms ccms 58160 Jan 18 16:43 commons-codec-1.4.jar -rw-r--r-- 1 ccms ccms 575389 Jan 18 16:43 commons-collections-3.2.1.jar -rw-r--r-- 1 ccms ccms 298829 Jan 18 16:43 commons-configuration-1.6.jar -rw-r--r-- 1 ccms ccms 13619 Jan 18 16:43 commons-daemon-1.0.1.jar -rw-r--r-- 1 ccms ccms 143602 Jan 18 16:43 commons-digester-1.8.jar -rw-r--r-- 1 ccms ccms 112341 Jan 18 16:43 commons-el-1.0.jar -rw-r--r-- 1 ccms ccms 279781 Jan 18 16:43 commons-httpclient-3.0.1.jar -rw-r--r-- 1 ccms ccms 163151 Jan 18 16:43 commons-io-2.1.jar -rw-r--r-- 1 ccms ccms 261809 Jan 18 16:43 commons-lang-2.4.jar -rw-r--r-- 1 ccms ccms 26202 Jan 18 16:43 commons-logging-api-1.0.4.jar -rw-r--r-- 1 ccms ccms 60686 Jan 18 16:43 commons-logging-1.1.1.jar -rw-r--r-- 1 ccms ccms 832410 Jan 18 16:43 commons-math-2.1.jar -rw-r--r-- 1 ccms ccms 273370 Jan 18 16:43 commons-net-3.1.jar -rw-r--r-- 1 root root 3054 Jan 30 16:57 flume-ssc-client-0.0.1.jar

#------------------------ Start flume #------------------------

export JAVA_HOME=/Users/nijovarghese/jdk_7/ bin/flume-ng agent --conf conf --conf-file conf/example.conf --name a1 -Dflume.root.logger=DEBUG,console

#-- post some messages for num in {1..120}; do echo $num; echo "date exception New Hellow World X $num" | nc localhost 44444 ; sleep 0.01; done

echo "date exception New Cluster World" | nc localhost 44444

*********************** Hadoop SET UP *******************************

ssh localhost

** -> If install Hadoop on Mac OSX, make sure turn on Remote Login under System Preferences then File Sharing. This worked on my machine.

if failed, set up ssh keys

ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys export HADOOP_PREFIX=/usr/local/hadoop

ssh localhost

#------------------------ configure hdfs data directory, otherwise it will store in temp directory and you will lose the data if Mac is restarted or process get killed #------------------------ dfs.namenode.name.dir /home/hduser/mydata/hdfs/namenode

dfs.datanode.data.dir /home/hduser/mydata/hdfs/datanode

bin/hdfs namenode -format sbin/start-dfs.sh

bin/hdfs dfs -mkdir /user bin/hdfs dfs -mkdir /user/nijovarghese bin/hdfs dfs -mkdir /user/nijovarghese/logsdata

⚠️ **GitHub.com Fallback** ⚠️