module Lexer(tokenise, Token(..)) where import Data.Char(isSpace, isAlpha, isDigit) data Token = LNum Integer | LId String | LString String | LPlus | LMinus | LTimes | LParenL | LParenR | LBraceL | LBraceR | LBracketL | LBracketR | LEqual | LEqualEqual | LDot | LDotDot | LComma | LAssign | LUnify | LColon | LSemicolon | LFun | LLet | LLetCC | LCase | LIf | LThen | LElse | LType | LModule | LImport | LObject | LBar | LDArrow | LArrow | LSkip | LPrint | LBreak | LContinue | LReturn | LLabel | LGoto | LWhile | LFor | LChoose | LFail | LTry | LCatch | LThrow deriving Eq instance Show Token where show (LNum n) = show n show (LId x) = x show (LString str) = show str show LPlus = "+" show LMinus = "-" show LTimes = "*" show LParenL = "(" show LParenR = ")" show LBraceL = "{" show LBraceR = "}" show LBracketL = "[" show LBracketR = "]" show LEqual = "=" show LEqualEqual = "==" show LDot = "." show LDotDot = ".." show LComma = "," show LAssign = ":=" show LUnify = ":=:" show LColon = ":" show LSemicolon = ";" show LFun = "fun" show LLet = "let" show LLetCC = "letcc" show LCase = "case" show LIf = "if" show LThen = "then" show LElse = "else" show LType = "type" show LModule = "module" show LImport = "import" show LObject = "object" show LBar = "|" show LDArrow = "=>" show LArrow = "->" show LSkip = "skip" show LPrint = "print" show LBreak = "break" show LContinue = "continue" show LReturn = "return" show LLabel = "label" show LGoto = "goto" show LWhile = "while" show LFor = "for" show LChoose = "choose" show LFail = "fail" show LTry = "try" show LCatch = "catch" show LThrow = "throw" keywords = [ ("fun", LFun), ("let", LLet), ("letcc", LLetCC), ("case", LCase), ("if", LIf), ("then", LThen), ("else", LElse), ("type", LType), ("module", LModule), ("import", LImport), ("object", LObject), ("skip", LSkip), ("print", LPrint), ("break", LBreak), ("continue", LContinue), ("return", LReturn), ("label", LLabel), ("goto", LGoto), ("while", LWhile), ("for", LFor), ("choose", LChoose), ("fail", LFail), ("try", LTry), ("catch", LCatch), ("throw", LThrow) ] tokenise :: String -> [Token] tokenise [] = [] tokenise ('=':'=':cs) = LEqualEqual : tokenise cs tokenise (':':'=':':':cs) = LUnify : tokenise cs tokenise (':':'=':cs) = LAssign : tokenise cs tokenise ('-':'>':cs) = LArrow : tokenise cs tokenise ('=':'>':cs) = LDArrow : tokenise cs tokenise ('.':'.':cs) = LDotDot : tokenise cs tokenise ('+':cs) = LPlus : tokenise cs tokenise ('-':cs) = LMinus : tokenise cs tokenise ('*':cs) = LTimes : tokenise cs tokenise (',':cs) = LComma : tokenise cs tokenise ('(':cs) = LParenL : tokenise cs tokenise (')':cs) = LParenR : tokenise cs tokenise ('{':cs) = LBraceL : tokenise cs tokenise ('}':cs) = LBraceR : tokenise cs tokenise ('[':cs) = LBracketL : tokenise cs tokenise (']':cs) = LBracketR : tokenise cs tokenise ('|':cs) = LBar : tokenise cs tokenise ('=':cs) = LEqual : tokenise cs tokenise (':':cs) = LColon : tokenise cs tokenise ('.':cs) = LDot : tokenise cs tokenise (';':cs) = LSemicolon : tokenise cs tokenise ('"':cs) = let (str, rest) = read_string cs in LString str : tokenise rest tokenise (c:cs) | isSpace c = tokenise cs tokenise (c:cs) | isDigit c = let (str, rest) = span isDigit cs in LNum (read (c:str)) : tokenise rest tokenise (c:cs) | isAlpha c = let (str, rest) = span (\c -> isAlpha c || isDigit c || c == '_') cs in let id = c:str in case lookup id keywords of Just tok -> tok : tokenise rest Nothing -> LId id : tokenise rest tokenise (c:_) = error ("unrecognised symbol " ++ [c]) read_string cs = iter [] cs where iter str [] = error "unterminated string" iter str ('"':cs) = (str, cs) iter str (c:cs) = iter (str ++ [c]) cs