Scala parser combinator runs out of memory

Posted by user3217013 on Stack Overflow See other posts from Stack Overflow or by user3217013
Published on 2014-06-08T09:07:33Z Indexed on 2014/06/08 9:24 UTC
Read the original article Hit count: 441

Filed under:
|
|

I wrote the following parser in Scala using the parser combinators:

import scala.util.parsing.combinator._

import scala.collection.Map
import scala.io.StdIn

object Keywords {
    val Define = "define"

    val True = "true"
    val False = "false"

    val If = "if"
    val Then = "then"
    val Else = "else"

    val Return = "return"
    val Pass = "pass"

    val Conj = ";"
    val OpenParen = "("
    val CloseParen = ")"
    val OpenBrack = "{"
    val CloseBrack = "}"
    val Comma = ","

    val Plus = "+"
    val Minus = "-"
    val Times = "*"
    val Divide = "/"
    val Pow = "**"

    val And = "&&"
    val Or = "||"
    val Xor = "^^"
    val Not = "!"

    val Equals = "=="
    val NotEquals = "!="

    val Assignment = "="
}

//---------------------------------------------------------------------------------

sealed abstract class Op

case object Plus extends Op 
case object Minus extends Op 
case object Times extends Op 
case object Divide extends Op 
case object Pow extends Op 
case object And extends Op 
case object Or extends Op 
case object Xor extends Op 
case object Not extends Op 
case object Equals extends Op 
case object NotEquals extends Op 
case object Assignment extends Op 

//---------------------------------------------------------------------------------

sealed abstract class Term

case object TrueTerm extends Term
case object FalseTerm extends Term

case class FloatTerm(value : Float) extends Term
case class StringTerm(value : String) extends Term
case class Identifier(name : String) extends Term

//---------------------------------------------------------------------------------

sealed abstract class Expression

case class TermExp(term : Term) extends Expression

case class UnaryOp(op : Op, exp : Expression) extends Expression
case class BinaryOp(op : Op, left : Expression, right : Expression) extends Expression

case class FuncApp(funcName : Term, args : List[Expression]) extends Expression

//---------------------------------------------------------------------------------

sealed abstract class Statement

case class ExpressionStatement(exp : Expression) extends Statement

case class Pass() extends Statement

case class Return(value : Expression) extends Statement

case class AssignmentVar(variable : Term, exp : Expression) extends Statement

case class IfThenElse(testBody : Expression, thenBody : Statement, elseBody : Statement) extends Statement
case class Conjunction(left : Statement, right : Statement) extends Statement

case class AssignmentFunc(functionName : Term, args : List[Term], body : Statement) extends Statement

//---------------------------------------------------------------------------------

class myParser extends JavaTokenParsers {

    val keywordMap : Map[String, Op] = Map( 
        Keywords.Plus -> Plus,
        Keywords.Minus -> Minus,
        Keywords.Times -> Times,
        Keywords.Divide -> Divide,
        Keywords.Pow -> Pow,
        Keywords.And -> And,
        Keywords.Or -> Or,
        Keywords.Xor -> Xor,
        Keywords.Not -> Not,
        Keywords.Equals -> Equals,
        Keywords.NotEquals -> NotEquals,
        Keywords.Assignment -> Assignment 
    )

    def floatTerm : Parser[Term] = decimalNumber ^^ {
        case x => FloatTerm( x.toFloat )
    }

    def stringTerm : Parser[Term] = stringLiteral ^^ {
        case str => StringTerm(str)
    }

    def identifier : Parser[Term] = ident ^^ {
        case value => Identifier(value)
    }

    def boolTerm : Parser[Term] = (Keywords.True | Keywords.False) ^^ {
        case Keywords.True => TrueTerm
        case Keywords.False => FalseTerm
    }

    def simpleTerm : Parser[Expression] = (boolTerm | floatTerm | stringTerm) ^^ {
        case term => TermExp(term)
    }

    def argument = expression

    def arguments_aux : Parser[List[Expression]] = (argument <~ Keywords.Comma) ~ arguments ^^ {
        case arg ~ argList => arg :: argList
    }

    def arguments  = arguments_aux | { argument ^^ { case arg => List(arg) } }

    def funcAppArgs : Parser[List[Expression]] = funcEmptyArgs | ( Keywords.OpenParen ~> arguments <~ Keywords.CloseParen ^^ {
        case args => args.foldRight(List[Expression]()) ( (a,b) => a :: b )
    } )

    def funcApp = identifier ~ funcAppArgs ^^ {
        case funcName ~ argList => FuncApp(funcName, argList)
    }

    def variableTerm : Parser[Expression] = identifier ^^ {
        case name => TermExp(name)
    }

    def atomic_expression = simpleTerm | funcApp | variableTerm

    def paren_expression : Parser[Expression] = Keywords.OpenParen ~> expression <~ Keywords.CloseParen

    def unary_operation : Parser[String] = Keywords.Not

    def unary_expression : Parser[Expression] = operation(0) ~ expression(0) ^^ {
        case op ~ exp => UnaryOp(keywordMap(op), exp)
    }

    def operation(precedence : Int) : Parser[String] = precedence match {
        case 0 => Keywords.Not
        case 1 => Keywords.Pow
        case 2 => Keywords.Times | Keywords.Divide | Keywords.And
        case 3 => Keywords.Plus | Keywords.Minus | Keywords.Or | Keywords.Xor
        case 4 => Keywords.Equals | Keywords.NotEquals
        case _ => throw new Exception("No operations with this precedence.")
    }

    def binary_expression(precedence : Int) : Parser[Expression] = precedence match {
        case 0 => throw new Exception("No operation with zero precedence.")
        case n => (expression (n-1)) ~ operation(n) ~ (expression (n)) ^^ {
            case left ~ op ~ right => BinaryOp(keywordMap(op), left, right)
        }
    }

    def expression(precedence : Int) : Parser[Expression] = precedence match {
        case 0 => unary_expression | paren_expression | atomic_expression
        case n => binary_expression(n) | expression(n-1)
    }

    def expression : Parser[Expression] = expression(4)

    def expressionStmt : Parser[Statement] = expression ^^ { case exp => ExpressionStatement(exp) }

    def assignment : Parser[Statement] = (identifier <~ Keywords.Assignment) ~ expression ^^ {
        case varName ~ exp => AssignmentVar(varName, exp)
    }

    def ifthen : Parser[Statement] = ((Keywords.If ~ Keywords.OpenParen) ~> expression <~ Keywords.CloseParen) ~
        ((Keywords.Then ~ Keywords.OpenBrack) ~> statements <~ Keywords.CloseBrack) ^^ {
            case ifBody ~ thenBody => IfThenElse(ifBody, thenBody, Pass())
        }

    def ifthenelse : Parser[Statement] = ((Keywords.If ~ Keywords.OpenParen) ~> expression <~ Keywords.CloseParen) ~
        ((Keywords.Then ~ Keywords.OpenBrack) ~> statements <~ Keywords.CloseBrack) ~
        ((Keywords.Else ~ Keywords.OpenBrack) ~> statements <~ Keywords.CloseBrack) ^^ {
            case ifBody ~ thenBody ~ elseBody => IfThenElse(ifBody, thenBody, elseBody)
        }

    def pass : Parser[Statement] = Keywords.Pass ^^^ { Pass() }

    def returnStmt : Parser[Statement] = Keywords.Return ~> expression ^^ {
        case exp => Return(exp)
    }

    def statement : Parser[Statement] = ((pass | returnStmt | assignment |  expressionStmt) <~ Keywords.Conj) | ifthenelse | ifthen

    def statements_aux : Parser[Statement] = statement ~ statements ^^ { case st ~ sts => Conjunction(st, sts) } 

    def statements : Parser[Statement] = statements_aux | statement

    def funcDefBody : Parser[Statement] = Keywords.OpenBrack ~> statements <~ Keywords.CloseBrack

    def funcEmptyArgs = Keywords.OpenParen ~ Keywords.CloseParen ^^^ { List() }

    def funcDefArgs : Parser[List[Term]] = funcEmptyArgs | Keywords.OpenParen ~> repsep(identifier, Keywords.Comma) <~ Keywords.CloseParen ^^ {
        case args => args.foldRight(List[Term]()) ( (a,b) => a :: b )
    }

    def funcDef : Parser[Statement] = (Keywords.Define ~> identifier) ~ funcDefArgs ~ funcDefBody ^^ {
        case funcName ~ funcArgs ~ body => AssignmentFunc(funcName, funcArgs, body)
    }

    def funcDefAndStatement : Parser[Statement] = funcDef | statement

    def funcDefAndStatements_aux : Parser[Statement] = funcDefAndStatement ~ funcDefAndStatements ^^ {
        case stmt ~ stmts => Conjunction(stmt, stmts)
    }

    def funcDefAndStatements : Parser[Statement] = funcDefAndStatements_aux | funcDefAndStatement

    def parseProgram : Parser[Statement] = funcDefAndStatements

    def eval(input : String) = {
        parseAll(parseProgram, input) match {
            case Success(result, _) => result
            case Failure(m, _) => println(m)
            case _ => println("")
        }
    }
}

object Parser {
    def main(args : Array[String]) {
        val x : myParser = new myParser()

        println(args(0))

        val lines = scala.io.Source.fromFile(args(0)).mkString

        println(x.eval(lines))
    }
}

The problem is, when I run the parser on the following example it works fine:

define foo(a) {
    if (!h(IM) && a) then {
        return 0;
    }   

    if (a() && !h()) then {
        return 0;
    }    
}

But when I add threes characters in the first if statement, it runs out of memory. This is absolutely blowing my mind. Can anyone help? (I suspect it has to do with repsep, but I am not sure.)

define foo(a) {
    if (!h(IM) && a(1)) then {
        return 0;
    }   

    if (a() && !h()) then {
        return 0;
    }    
}

EDIT: Any constructive comments about my Scala style is also appreciated.

© Stack Overflow or respective owner

Related posts about scala

Related posts about parsing