Spark day1 - TechCruncher/SparkCode GitHub Wiki
Spark Basics Day 1
val logdata = sc.textFile("file:/home/training/training_materials/sparkdev/data/weblogs/2013-09-15.log")
val jpglogs = logdata.filter(x ⇒ x.contains(".jpg"))
jpglogs.map(line ⇒ line.length).take(5)
logdata.map(line ⇒ line.split(" ")).take(5)
val ips = logdata.map(line ⇒ line.split(" ")(0)).take(5)
for(line ← ips.take(10)){ println(line) }
ips.saveAsTextFile("file:/home/training/iplist")
Challenge - 1
val logdataall = sc.textFile("file:/home/training/training_materials/sparkdev/data/weblogs/*.log")
val ipsall = logdataall.map(line ⇒ line.split(" ")(0))
ipsall.saveAsTextFile("file:/home/training/iplistall")
Challenge - 2
val logdataall = sc.textFile("file:/home/training/training_materials/sparkdev/data/weblogs/*.log")
val htmllogs = logdataall.filter(x ⇒ x.contains(".html"))
val output = htmllogs.map(line ⇒ line.split(" ")(0) + "/" + line.split(" ")(2))
output.saveAsTextFile("file:/home/training/challenge2")