apache flume config - nijov/myPOCs GitHub Wiki
example.conf: A single-node Flume configuration
#-------------------------- Stage 2 --------------------------------
a1.sources.rr1.type = avro a1.sources.rr1.channels = cc1 a1.sources.rr1.bind = localhost a1.sources.rr1.port = 4141
Define source interceptors
a1.sources.rr1.interceptors = i1 a1.sources.rr1.interceptors.i1.type = regex_filter a1.sources.rr1.interceptors.i1.regex = (?i)(error|exception) a1.sources.rr1.interceptors.i1.excludeEvents = false
a1.channels.cc1.type = memory a1.channels.cc1.capacity = 300 a1.channels.cc1.transactionCapacity = 100
a1.sinks.kk1.type = file_roll a1.sinks.kk1.sink.directory = /tmp/s2 a1.sinks.kk1.batchSize = 100 a1.sinks.kk1.sink.rollInterval = 900
a1.sinks.kk1.channel = cc1
#------------------------ Stage 1 -----------------------------------
Name the components on this agent
a1.sources = r1 rr1 a1.sinks = k1 k2 kk1 a1.channels = c1 c2 cc1
Describe/configure the source
a1.sources.r1.type = netcat a1.sources.r1.bind = localhost a1.sources.r1.port = 44444 a1.sources.r1.channels = c1 c2 a1.sources.r1.selector.type = replicating
Use a channel which buffers events in memory
a1.channels.c1.type = memory a1.channels.c1.capacity = 300 a1.channels.c1.transactionCapacity = 100
Describe the sink --save to local file
#a1.sinks.k1.type = file_roll #a1.sinks.k1.sink.directory = /tmp
Avro sink
a1.sinks.k1.type = avro a1.sinks.k1.hostname = localhost a1.sinks.k1.port = 4141 a1.sinks.k1.batch-size = 100
Bind the source and sink to the channel
a1.sinks.k1.channel = c1
#----------------------------------------------------- #a1.sources.r1.type = exec #a1.sources.r1.command = tail -f /tmp/test.log #a1.sources.r1.batchSize = 1 #a1.sources.r1.interceptors = i1 #a1.sources.r1.interceptors.i1.type = timestamp
#-show in console #a1.sinks.k1.channel = c1 #a1.sinks.k1.type = logger #a1.sinks.k1.serializer = TEXT #-----------------------------------------------------
Define source interceptors
None defined
Use a channel which buffers events in memory
a1.channels.c2.type = memory a1.channels.c2.capacity = 100 a1.channels.c2.transactionCapacity = 100
Describe the sink --Save in Hadoop
a1.sinks.k2.type = hdfs a1.sinks.k2.hdfs.path = hdfs://localhost:9000/user/nijovarghese/logsdata a1.sinks.k2.hdfs.fileType=DataStream a1.sinks.k2.hdfs.writeFormat = Text a1.sinks.k2.serializer = text
Bind the source and sink to the channel
a1.sinks.k2.channel = c2