ICP5 - GeoSnipes/Big-Data GitHub Wiki
5-2 15 Naga Venkata Satya Pranoop Mutha
5-2 23 Geovanni West
Link to histogram source: https://github.com/GeoSnipes/Big-Data/tree/master/ICP/icp5/documentation/histogram
object SimpleServer {
def main(args: Array[String]) {
implicit val system = ActorSystem("my-system")
implicit val materializer = ActorMaterializer()
implicit val executionContext = system.dispatcher
val route:Route = cors() {
path("get_custom") {
post {
entity(as[akka.http.scaladsl.model.FormData]) { formData:akka.http.scaladsl.model.FormData =>
val imageByte = (new BASE64Decoder()).decodeBuffer(formData.fields.toString());
val bytes = new ByteArrayInputStream(imageByte)
val image = ImageIO.read(bytes)
ImageIO.write(image, "png", new File("image.png"))
val rep = IPApp.testImage("image.png")
complete(rep)
}
}
}
}
val bindingFuture = Http().bindAndHandle(route, "127.0.0.1", 8080)
println(s"Server online at http://127.0.0.1:8080/\nPress RETURN to stop...")
StdIn.readLine() // Run until user presses return
bindingFuture
.flatMap(_.unbind()) // trigger unbinding from the port
.onComplete(_ => system.terminate()) // and shutdown when done
}
}
The prediction web server is binded to port 8080. Once it receives an image, it calls IPapp which actually runs the various methods to come up with a prediction.
object IPApp {
val IMAGE_CATEGORIES = List("apple_pie","baby_back_ribs","baklava")
/**
* @note Test method for classification on Spark
* @param sc : Spark Context
* @return
*/
def testImageClassification(sc: SparkContext, path: String): String ={
val model = KMeansModel.load(sc, IPSettings.KMEANS_PATH)
val vocabulary = ImageUtils.vectorsToMat(model.clusterCenters)
val desc = ImageUtils.bowDescriptors(path, vocabulary)
val histogram = ImageUtils.matToVector(desc)
println("-- Histogram size : " + histogram.size)
println(histogram.toArray.mkString(" "))
val rfModel = RandomForestModel.load(sc, IPSettings.RANDOM_FOREST_PATH)
val p = rfModel.predict(histogram)
(s"Test image predicted as : " + IMAGE_CATEGORIES(p.toInt))
}
def testImage(string: String):String = {
val conf = new SparkConf()
.setAppName(s"IPApp")
.setMaster("local[*]")
.set("spark.executor.memory", "6g")
.set("spark.driver.memory", "6g")
val sparkConf = new SparkConf().setAppName("ImgClassifier").setMaster("local[*]")
val sc= SparkContext.getOrCreate(sparkConf)
val res = testImageClassification(sc, string)
printf(res);
res
}
}
IPapp then takes the image and genereates data based on that image in the form of a histogram. It then outputs its prediction using the model generated from Random Forrest.
A few of the predictions given:
Below is the best confusion matrix generated. It has been formatted to make it more readable but it was taken straight from the console.
Our model had a prediction accuracy of 64%.

Source Code for Part 1: https://github.com/GeoSnipes/Big-Data/tree/master/ICP/icp5/src/image_classification/src/main/scala




