import StringIO class Token: def __init__(self, raw): self.raw = raw def __eq__(self, x): assert isinstance(x, Token) return self.raw == x.raw class Num(Token): def __init__(self, raw): Token.__init__(self, raw) self.value = int(raw) class Identifier(Token): pass class Div(Token): pass class Mod(Token): pass class Done(Token): pass buf = None lineno = None lookahead = None symtable = dict() def lexan(): while True: t = buf.read(1) if t.isspace(): pass #skip white spaces elif t == '\n': lineno = lineno + 1 elif t.isdigit(): tokenval = None tokenval = int(t) t = buf.read(1) while t.isdigit(): tokenval = tokenval * 10 + int(t) t = buf.read(1) buf.seek(-1, 1) return Num(tokenval) elif t.isalpha(): lexbuf = '' while t.isalpha() or t.isdigit(): lexbuf += t t = buf.read(1) # no size error... # lexbuf += EOS python string do not need explicit \0 if t != '': buf.seek(-1, 1) # it is better idea to tweek constructor of Identity as a flyweight pattern? try: i = symtable[lexbuf] except KeyError: i = Identifier(lexbuf) symtable[lexbuf] = i return i elif t == '': #EOF return Done(t) else: # tokenval = None return Token(t) #, tokenval) def parse(): global lookahead lookahead = lexan() while not isinstance(lookahead, Done): expr() match(';') def expr(): global lookahead t = None term() while True: if lookahead.raw in ('+', '-'): t = lookahead match(lookahead) term() emit(t) continue else: return def term(): global lookahead factor() while True: if lookahead.raw in ('*', '/') or isinstance(lookahead, (Div, Mod)): t = lookahead match(lookahead) factor() emit(t) continue else: return def factor(): global lookahead if lookahead.raw == '(': match('(') expr() match(')') elif isinstance(lookahead, Num): emit(lookahead) match(lookahead) elif isinstance(lookahead, Identifier): emit(lookahead) match(lookahead) #Identifier) else: error("syntax error") def match(t): global lookahead if isinstance(t, Token): if lookahead == t: lookahead = lexan() else: error("syntax error") elif isinstance(t, str): if lookahead.raw == t: lookahead = lexan() else: error('lookahead is %s, match arg is %s'%(lookahead.raw, t)) else: error("syntax error") def error(msg): raise msg + ' at %d'%(lineno) def emit(t): if False: pass elif isinstance(t, Div): print 'Div' elif isinstance(t, Mod): print 'Mod' elif isinstance(t, Num): print '%d'%(t.value) elif isinstance(t, Identifier): print t.raw else: print "token %s"%(t.raw) def init(): global symtable symtable['div'] = Div('div') symtable['mod'] = Mod('mod') buf = StringIO.StringIO('2+3 * 5;\n 12 div 7 mod 2;') lineno = 1 init() parse()
2011年7月6日水曜日
写経 p87
いろいろ間違えた。
登録:
コメントの投稿 (Atom)
0 件のコメント:
コメントを投稿