module Parser(parse, PExpr(..), PDecl(..), PPattern(..)) where import Control.Monad.Trans.State import Lexer type Symbol = String data PExpr = PId Symbol | PNum Integer | PPlus PExpr PExpr | PMinus PExpr PExpr | PTimes PExpr PExpr | PEqual PExpr PExpr | PArrow PExpr PExpr | PFun [(Symbol, Maybe PExpr)] PExpr | PApp PExpr [PExpr] | PRecord [(Symbol, PExpr)] | PMember PExpr Symbol | PLetCC Symbol PExpr | PIf PExpr PExpr PExpr | PCase PExpr [(PPattern, PExpr)] | PAssign PExpr PExpr | PSeq PExpr PExpr | PSkip | PUnify PExpr PExpr | PPrint String PExpr | PWhile PExpr PExpr | PFor Symbol PExpr PExpr PExpr | PBreak | PContinue | PReturn PExpr | PLabel Symbol | PGoto Symbol | PTry PExpr Symbol PExpr | PThrow PExpr | PListLit [PExpr] (Maybe PExpr) | PDecl PDecl PExpr | PObject [(Symbol, [(Symbol, Maybe PExpr)], Maybe PExpr, PExpr)] | PChoose [PExpr] | PFail deriving Show data PDecl = PLet Symbol (Maybe PExpr) PExpr | PLetFun Symbol [(Symbol, Maybe PExpr)] (Maybe PExpr) PExpr | PLetUnIni Symbol (Maybe PExpr) | PTypeVar Symbol [Symbol] [(Symbol, [PExpr])] | PTypeRec Symbol [Symbol] [(Symbol, PExpr)] | PTypeDef Symbol [Symbol] PExpr | PModule Symbol [Symbol] [PDecl] | PLetModule Symbol PExpr | PImport PExpr deriving Show data PPattern = PElsePat | PVarPat Symbol | PCtorPat Symbol [Symbol] | PNumPat Integer deriving Show parse :: String -> PExpr parse str = evalState parse' (tokenise str) where parse' = do expr <- parse_expr rest <- get case rest of [] -> do { return expr } _ -> error ("additional tokens after parsing" ++ show rest) -- grammar: -- -- expr0 ::= NUM | ID | ( expr ) | { expr } | [ id = expr, ..., id = expr ] -- | "fun" ( ID , ... , ID ) { expr } -- | "while" expr { expr } | "for" ID = expr "to" expr { expr } -- | "skip" | "break" | "continue" | "label" ID | "goto ID" -- | [ expr , ... , expr ] | [ expr , ... , expr | expr ] -- | "object" { fundef ; ...; fundef ; } | "abort" -- | "letcc" ID { expr5" } -- | "if" expr "then" expr "else" expr5 -- | "case" expr | pattern => expr5 | ... | pattern => expr5 -- | "choose" | expr5 ... | expr5 -- | "try" expr "catch" ID => expr5 | "throw" expr5 -- expr1 ::= expr0 | expr0 ( expr , ... , expr ) | module_expr . ID | expr1 := expr4 -- expr2 ::= expr1 | expr2 * expr1 -- expr3 ::= expr2 | expr3 + expr2 | expr3 - expr2 | expr2 -> expr3 -- expr4 ::= expr3 == expr3 | expr3 :=: expr3 -- expr5 ::= expr4 | "return" expr4 | "print" MSG expr4 -- expr6 ::= expr5 -- | expr; expr -- | decl; expr -- expr ::= decls expr6 -- decl ::= "let" ID = expr3 -- | "let" fundef -- | "type" ID = | variant | ... | variant -- | "type" ID = [ ID = type ,..., ID = type ] -- | "type" ID = type -- | "module" ID ( ID , ... , ID ){ decls } -- | "module" ID = module_expr -- | "import" module_expr -- decls ::= decl; | decl; decls -- -- fundef ::= ID ( ID , ... , ID ) { expr } -- -- module_expr ::= ID | module_expr . ID | module_expr ( module_expr , ... , module_expr ) -- helper functions wrong_token :: String -> Maybe Token -> a wrong_token str Nothing = error "unexpected end-of-file" wrong_token str (Just t) = error ("parse error: I expected to find a " ++ str ++ " here, but got a \"" ++ show t ++ "\" token instead!") peek :: State [Token] (Maybe Token) peek = do tokens <- get case tokens of [] -> return Nothing (t:_) -> return (Just t) get_next :: State [Token] Token get_next = do tokens <- get case tokens of [] -> error "unexpected end-of-file" (t:ts) -> do { put ts; return t } skip :: State [Token] () skip = do tokens <- get case tokens of [] -> wrong_token "" Nothing (_:toks) -> put toks unskip :: Token -> State [Token] () unskip tok = do tokens <- get put (tok:tokens) match :: Token -> State [Token] () match tok = do next <- peek case next of Just t | tok == t -> skip Just t -> error ("I did expect a \"" ++ show tok ++ "\" token here, but did find a \"" ++ show t ++ "\" token instead!") Nothing -> error ("unexpected end-of-file while looking for a \"" ++ show tok ++ "\" token!") if_match :: Token -> State [Token] Bool if_match tok = do next <- peek case next of Just t | tok == t -> do { skip; return True } _ -> return False if_match_one_of :: [Token] -> State [Token] Bool if_match_one_of toks = do next <- peek case next of Just t | elem t toks -> do { skip; return True } _ -> return False parse_list :: State [Token] a -> State [Token] [a] parse_list p = do next <- peek case next of Just LParenR -> return [] Just LBracketR -> return [] Just LBar -> return [] Nothing -> error "unexpected end-of-file" _ -> do iter where iter = do x <- p next <- peek case next of Just LComma -> do { skip; xs <- iter; return (x:xs) } Just LParenR -> do { return [x] } Just LBracketR -> do { return [x] } Just LBar -> do { return [x] } _ -> wrong_token "comma or bracket" next parse_opt_list :: State [Token] a -> State [Token] [a] parse_opt_list p = do found <- if_match LParenL if found then do lst <- parse_list p match LParenR return lst else return [] parse_id :: State [Token] Symbol parse_id = do next <- peek case next of Just (LId x) -> do { skip; return x } _ -> wrong_token "identifier" next parse_num :: State [Token] Integer parse_num = do next <- peek case next of Just (LNum n) -> do { skip; return n } _ -> wrong_token "number" next parse_string :: State [Token] String parse_string = do next <- peek case next of Just (LString s) -> do { skip; return s } _ -> wrong_token "string" next parse_arg :: Token -> State [Token] (Symbol, PExpr) parse_arg sep = do id <- parse_id match sep e <- parse_expr return (id, e) parse_arg_opt :: Token -> State [Token] (Symbol, Maybe PExpr) parse_arg_opt sep = do id <- parse_id found <- if_match sep if found then do e <- parse_expr return (id, Just e) else return (id, Nothing) parse_fun_def :: State [Token] (Symbol, [(Symbol, Maybe PExpr)], Maybe PExpr, PExpr) parse_fun_def = do id <- parse_id match LParenL args <- parse_list (parse_arg_opt LColon) match LParenR found <- if_match LColon typ <- if found then do { e <- parse_expr; return (Just e) } else return Nothing match LBraceL body <- parse_expr match LBraceR return (id, args, typ, body) parse_methods :: State [Token] [(Symbol, [(Symbol, Maybe PExpr)], Maybe PExpr, PExpr)] parse_methods = do next <- peek case next of Just (LId _) -> do m <- parse_fun_def match LSemicolon ms <- parse_methods return (m:ms) _ -> return [] parse_expr0 :: State [Token] PExpr parse_expr0 = do next <- peek case next of Just (LId id) -> do { skip; return (PId id) } Just (LNum n) -> do { skip; return (PNum n) } Just LSkip -> do { skip; return PSkip } Just LBreak -> do { skip; return PBreak } Just LContinue -> do { skip; return PContinue } Just LFail -> do { skip; return PFail } Just LParenL -> do skip found <- if_match LParenR if found then return (PId "()") else do expr <- parse_expr match LParenR return expr Just LBraceL -> do skip expr <- parse_expr match LBraceR return expr Just LFun -> do skip match LParenL args <- parse_list (parse_arg_opt LColon) match LParenR match LBraceL body <- parse_expr match LBraceR return (PFun args body) Just LBracketL -> do skip next <- get_next next2 <- peek case (next, next2) of (LId id, Just LEqual) -> do unskip next fields <- parse_list (parse_arg LEqual) match LBracketR return (PRecord fields) _ -> do unskip next args <- parse_list parse_expr found <- if_match LBar if found then do tail <- parse_expr match LBracketR return (PListLit args (Just tail)) else do match LBracketR return (PListLit args Nothing) Just LWhile -> do skip cond <- parse_expr match LBraceL expr <- parse_expr match LBraceR return (PWhile cond expr) Just LFor -> do skip i <- parse_id match LEqual l <- parse_expr match LDotDot u <- parse_expr match LBraceL expr <- parse_expr match LBraceR return (PFor i l u expr) Just LLabel -> do skip l <- parse_id return (PLabel l) Just LGoto -> do skip l <- parse_id return (PGoto l) Just LLetCC -> do skip k <- parse_id match LBraceL e <- parse_expr match LBraceR return (PLetCC k e) Just LIf -> do skip c <- parse_expr match LThen t <- parse_expr match LElse e <- parse_expr return (PIf c t e) Just LCase -> do skip expr <- parse_expr cases <- parse_cases parse_case return (PCase expr cases) Just LChoose -> do skip cases <- parse_cases parse_expr5 return (PChoose cases) Just LTry -> do skip expr <- parse_expr match LCatch var <- parse_id match LDArrow handler <- parse_expr return (PTry expr var handler) Just LThrow -> do skip expr <- parse_expr return (PThrow expr) Just LObject -> do skip match LBraceL methods <- parse_methods match LBraceR return (PObject methods) _ -> wrong_token "expression" next parse_expr1 :: State [Token] PExpr parse_expr1 = do expr <- parse_expr0 iter expr where iter expr = do next <- peek case next of Just LParenL -> do skip args <- parse_list parse_expr match LParenR iter (PApp expr args) Just LDot -> do skip id <- parse_id iter (PMember expr id) Just LAssign -> do skip e <- parse_expr4 iter (PAssign expr e) _ -> return expr parse_expr2 :: State [Token] PExpr parse_expr2 = do expr <- parse_expr1 iter expr where iter expr = do found <- if_match LTimes if found then do e <- parse_expr1 iter (PTimes expr e) else return expr parse_expr3 :: State [Token] PExpr parse_expr3 = do expr <- parse_expr2 next <- peek case next of Just LPlus -> iter expr Just LMinus -> iter expr Just LArrow -> do skip rest <- parse_expr3 -- "rest" might contain +,- instead of -> return (PArrow expr rest) -- but we don't care _ -> return expr where iter expr = do next <- peek case next of Just LPlus -> do { skip; e <- parse_expr2; iter (PPlus expr e) } Just LMinus -> do { skip; e <- parse_expr2; iter (PMinus expr e) } _ -> do { return expr } parse_expr4 :: State [Token] PExpr parse_expr4 = do expr <- parse_expr3 next <- peek case next of Just LEqualEqual -> do skip e2 <- parse_expr3 return (PEqual expr e2) Just LUnify -> do skip e2 <- parse_expr3 return (PUnify expr e2) _ -> return expr parse_expr5 :: State [Token] PExpr parse_expr5 = do next <- peek case next of Just LReturn -> do skip expr <- parse_expr4 return (PReturn expr) Just LPrint -> do skip str <- parse_string expr <- parse_expr4 return (PPrint str expr) _ -> parse_expr4 parse_expr6 = do e <- parse_expr5 parse_rest e where parse_rest e = do found <- if_match LSemicolon if found then do next <- peek case next of Just LBraceR -> return e -- allow semicolon in front of closing brace _ -> do es <- parse_expr return (PSeq e es) else return e parse_expr :: State [Token] PExpr parse_expr = do decls <- parse_decls expr <- parse_expr6 return (foldr (\d e -> PDecl d e) expr decls) parse_variants :: State [Token] [(Symbol, [PExpr])] parse_variants = do found <- if_match LBar if found then do ctor <- parse_id args <- parse_opt_list parse_expr rest <- parse_variants return ((ctor, args) : rest) else return [] parse_cases :: State [Token] a -> State [Token] [a] parse_cases p = do found <- if_match LBar if found then do x <- p rest <- parse_cases p return (x:rest) else return [] parse_case :: State [Token] (PPattern, PExpr) parse_case = do pat <- parse_pattern match LDArrow expr <- parse_expr return (pat, expr) parse_pattern = do next <- peek case next of Just LElse -> do { skip; return PElsePat } Just (LNum n) -> do { skip; return (PNumPat n) } Just (LId x) -> do skip args <- parse_opt_list parse_id case args of [] -> return (PVarPat x) _ -> return (PCtorPat x args) Just LBracketL -> do skip found <- if_match LBracketR if found then return (PVarPat "%Nil") else do h <- parse_id match LBar t <- parse_id match LBracketR return (PCtorPat "Cons" [h,t]) parse_decls = do next <- peek case next of Just LLet -> do skip id <- parse_id next <- peek case next of Just LColon -> do skip t <- parse_expr found <- if_match LEqual if found then do e <- parse_expr3 match LSemicolon rest <- parse_decls return (PLet id (Just t) e : rest) else do match LSemicolon rest <- parse_decls return (PLetUnIni id (Just t) : rest) Just LEqual -> do skip expr <- parse_expr3 match LSemicolon rest <- parse_decls return (PLet id Nothing expr : rest) Just LSemicolon -> do skip rest <- parse_decls return (PLetUnIni id Nothing : rest) Just LParenL -> do unskip (LId id) (id, args, typ, body) <- parse_fun_def match LSemicolon rest <- parse_decls return (PLetFun id args typ body : rest) _ -> wrong_token "variable definition" next Just LType -> do skip name <- parse_id args <- parse_opt_list parse_id match LEqual next <- peek case next of Just LBar -> do vars <- parse_variants match LSemicolon rest <- parse_decls return (PTypeVar name args vars : rest) Just LBracketL -> do skip fields <- parse_list (parse_arg LColon) match LBracketR match LSemicolon rest <- parse_decls return (PTypeRec name args fields : rest) _ -> do t <- parse_expr5 match LSemicolon rest <- parse_decls return (PTypeDef name args t : rest) Just LModule -> do skip name <- parse_id found <- if_match LEqual if found then do expr <- parse_expr match LSemicolon rest <- parse_decls return (PLetModule name expr : rest) else do args <- parse_opt_list parse_id match LBraceL decls <- parse_decls match LBraceR match LSemicolon rest <- parse_decls return (PModule name args decls : rest) Just LImport -> do skip mod <- parse_expr1 match LSemicolon rest <- parse_decls return (PImport mod : rest) _ -> return []