LogParser - grant-guo/Ideas GitHub Wiki
import scala.collection.mutable.ListBuffer
import java.nio.charset.StandardCharsets
/**
* this class is used to parse log string which is composed of key/value pairs, separated with space
*
* parser uses a state machine - LogParserStateMachine
*/
case class Log(kv: List[(String, String, QUOTED)])
object Log {
type QUOTED = Boolean
}
class LogParser {
val stateMachine = new LogParserStateMachine
def parse(raw: Array[Byte]): Log = {
parse(new String(raw, StandardCharsets.UTF_8))
}
def parse(raw: String): Log = {
for(c <- raw) {
stateMachine.accept(c)
}
stateMachine.done()
}
}
class LogParserStateMachine {
var state: LogParserState = START
var context: Context = Context.newContext
def accept(c: Char): Unit = {
state match {
case START =>
c match {
case ' ' =>
case '"' => throw new LogFormatErrorException("Double quotes can't be in KEY")
case _ => state = KEY_START
}
case KEY_START =>
c match {
case ' ' =>
case cc if (cc.isLetter || cc.equals('_')) => {
context.key.append(cc)
state = KEY
}
case _ => {
println(s"${context.cache.toList}")
println(s"new key starts with '$c'")
throw new LogFormatErrorException("KEY only can start with letters or '_' ")
}
}
case KEY =>
c match {
case ' ' => state = TAILING_SPACE_IN_KEY
case '=' => state = VALUE_START
case _ => context.key.append(c)
}
case VALUE_START =>
c match {
case ' ' =>
case '"' => state = QUOTED_VALUE
case _ => {
context.value.append(c)
state = VALUE
}
}
case VALUE =>
c match {
case ' ' => {
context.cache.append((context.key.toString, context.value.toString, false))
state = KV_END
}
case _ => context.value.append(c)
}
case KV_END => {
context.key.setLength(0)
context.value.setLength(0)
c match {
case ' ' | '\t' =>
case _ => {
state = KEY_START
accept(c)
}
}
}
case QUOTED_VALUE =>
c match {
case '"' => {
context.cache.append((context.key.toString, context.value.toString, true))
state = KV_END
}
case '\\' => state = ESCAPING
case _ => context.value.append(c)
}
case ESCAPING => {
context.value.append('\\')
context.value.append(c)
state = QUOTED_VALUE
}
}//end of state match
}// end of accept
def done(): Log = {
val ret = Log(context.cache.toList)
context.cache.clear()
ret
}
case class Context(key: StringBuffer, value: StringBuffer, cache: ListBuffer[(String, String, QUOTED)])
object Context {
def newContext: Context = Context(new StringBuffer(), new StringBuffer(), ListBuffer.empty)
}
}
object LogParserState extends Enumeration {
type LogParserState = Value
val START, KEY_START, KEY, TAILING_SPACE_IN_KEY, VALUE_START, VALUE, KV_END, QUOTED_VALUE, ESCAPING = Value
}
class LogFormatErrorException(msg: String) extends Exception(msg)