apache flume config - nijov/myPOCs GitHub Wiki

example.conf: A single-node Flume configuration

#-------------------------- Stage 2 --------------------------------

a1.sources.rr1.type = avro a1.sources.rr1.channels = cc1 a1.sources.rr1.bind = localhost a1.sources.rr1.port = 4141

Define source interceptors

a1.sources.rr1.interceptors = i1 a1.sources.rr1.interceptors.i1.type = regex_filter a1.sources.rr1.interceptors.i1.regex = (?i)(error|exception) a1.sources.rr1.interceptors.i1.excludeEvents = false

a1.channels.cc1.type = memory a1.channels.cc1.capacity = 300 a1.channels.cc1.transactionCapacity = 100

a1.sinks.kk1.type = file_roll a1.sinks.kk1.sink.directory = /tmp/s2 a1.sinks.kk1.batchSize = 100 a1.sinks.kk1.sink.rollInterval = 900

a1.sinks.kk1.channel = cc1

#------------------------ Stage 1 -----------------------------------

Name the components on this agent

a1.sources = r1 rr1 a1.sinks = k1 k2 kk1 a1.channels = c1 c2 cc1

Describe/configure the source

a1.sources.r1.type = netcat a1.sources.r1.bind = localhost a1.sources.r1.port = 44444 a1.sources.r1.channels = c1 c2 a1.sources.r1.selector.type = replicating

Use a channel which buffers events in memory

a1.channels.c1.type = memory a1.channels.c1.capacity = 300 a1.channels.c1.transactionCapacity = 100

Describe the sink --save to local file

#a1.sinks.k1.type = file_roll #a1.sinks.k1.sink.directory = /tmp

Avro sink

a1.sinks.k1.type = avro a1.sinks.k1.hostname = localhost a1.sinks.k1.port = 4141 a1.sinks.k1.batch-size = 100

Bind the source and sink to the channel

a1.sinks.k1.channel = c1

#----------------------------------------------------- #a1.sources.r1.type = exec #a1.sources.r1.command = tail -f /tmp/test.log #a1.sources.r1.batchSize = 1 #a1.sources.r1.interceptors = i1 #a1.sources.r1.interceptors.i1.type = timestamp

#-show in console #a1.sinks.k1.channel = c1 #a1.sinks.k1.type = logger #a1.sinks.k1.serializer = TEXT #-----------------------------------------------------

Define source interceptors

None defined

Use a channel which buffers events in memory

a1.channels.c2.type = memory a1.channels.c2.capacity = 100 a1.channels.c2.transactionCapacity = 100

Describe the sink --Save in Hadoop

a1.sinks.k2.type = hdfs a1.sinks.k2.hdfs.path = hdfs://localhost:9000/user/nijovarghese/logsdata a1.sinks.k2.hdfs.fileType=DataStream a1.sinks.k2.hdfs.writeFormat = Text a1.sinks.k2.serializer = text

Bind the source and sink to the channel

a1.sinks.k2.channel = c2