2008年11月1日土曜日

parser for disassm output

このエントリーをブックマークに追加 このエントリーを含むはてなブックマーク
Use it as you like. This is BSD License.
It format output into html, with fragment jump for jump and call instructions.

import pprint
import re
import elementtree.ElementTree as ET

ASSEMBLY_START = '+++++++++++++++++++ ASSEMBLY CODE LISTING +++++++++++++++++++'
PROC_BOUNDARY = '========='
FRAG_BOUNDARY = '---------'
ASM_FORMAT = (r''':(?P[0-9A-Z]{8}) '''
'''(?P([0-9A-Z]{2})+)[ ]+'''
'''(?P[a-z]+)'''
'''( (?P([a-zA-Z0-9,+-<> ]|\[|\])+))?'''
)
asm_line = re.compile(ASM_FORMAT)

CROSS_REFERENCE_START = '*************** Cross Reference Listing ****************'
FRAG_LINE = r'--(?P[0-9A-Z]{8})::((?P[0-9A-Z]{8}),)+'
PROC_LINE = r'==(?P[0-9A-Z]{8})::((?P[0-9A-Z]{8}),)+'
CONT_LINE = r'[ ]+((?P[0-9A-Z]{8}),)+'


class Parser(object):
'''
>>> asm = file('hoge.asm', 'r')
>>> p = Parser()
>>> p.parse(asm)
>>> for n in p.assembly:
... print n, n.attrib['addr']

>>> print p.assembly[14][3], p.assembly[14][3].attrib['addr']
>>> for op in p.assembly[14][3]:
... print op, op.attrib['addr'], op.attrib['raw'], \
op.attrib['operator'],\
op.attrib['operand']
'''
def __init__(self):
self.current_parse = self.parse_something
self.prev_line = ''
self.assembly = ET.Element('program')
self.proc = ET.SubElement(self.assembly, 'procedure')
self.proc.attrib['addr'] = '0x0'
self.frag = ET.SubElement(self.proc, 'fragment')
self.frag.attrib['addr'] = '0x0'

def parse(self, f):
try:
for i, line in enumerate(f):
#pprint.pprint((i+1, line))
self.current_parse(line)
self.prev_line = line
except Exception, e:
print 'something wrong around line', i+1
pprint.pprint((i, self.prev_line))
pprint.pprint((i+1, line))
pprint.pprint(self.assembly)
pprint.pprint(self.proc)
pprint.pprint(self.frag)
raise e
def parse_assembly(self, line):
assert self.assembly is not None
if line.startswith(CROSS_REFERENCE_START):
print 'CROSS_REFERENCE_START'
self.current_parse = self.parse_reference
return
elif line.startswith(PROC_BOUNDARY):
self.proc = ET.SubElement(self.assembly, 'procedure')
assert self.proc is not None
self.frag = ET.SubElement(self.proc, 'fragment')
assert self.frag is not None
return
elif line.startswith(FRAG_BOUNDARY):
assert self.proc is not None
self.frag = ET.SubElement(self.proc, 'fragment')
assert self.frag is not None
return
else:
pass

for matchobj in asm_line.finditer(line):
d = matchobj.groupdict()
if 'line_addr' in d:
match = d['line_addr']
if self.prev_line.startswith(PROC_BOUNDARY):
assert self.proc is not None
self.proc.attrib['addr']= match

if self.prev_line.startswith(PROC_BOUNDARY) or \
self.prev_line.startswith(FRAG_BOUNDARY):
assert self.proc is not None
assert self.frag is not None
self.frag.attrib['addr']= match

op = ET.SubElement(self.frag, 'op')
op.attrib['addr'] = match
if 'raw_binary' in d:
op.attrib['raw'] = d['raw_binary']
if 'operator' in d:
op.attrib['operator'] = d['operator']
if 'operand' in d:
op.attrib['operand'] = d['operand']

def parse_reference(self, line):
pass

def parse_something(self, line):
if line.startswith(ASSEMBLY_START):
print 'ASSEMBLY_START'
self.current_parse = self.parse_assembly

class DocTree(object):
def __init__(self):
self.root = ET.Element('html')
assert self.root is not None
self.head = ET.SubElement(self.root, "head")
title = ET.SubElement(self.head, "title")
title.text = "formatted asm result"
self.body = ET.SubElement(self.root, 'body')
assert self.body is not None
def setcss(self, css):
e = ET.fromstring(css)
print 'using css'
print '-' * 20
print e
print e.text
print '-' * 20
self.head.append(e)

def html(self):
return ET.ElementTree(self.root)

css = '''
'''
class MakeHTMLVisit(object):
def __init__(self):
self.doc = DocTree()
self.doc.setcss(css)
assert self.doc.body is not None
self.current = self.doc.body
assert self.current is not None

def accept(self, e):
assert self.current is not None
parent = self.current
self.current = self.SubElement(parent, e)
for c in e:
self.accept(c)
self.current = parent

def SubElement(self, doc_parent, src_element):
assert doc_parent is not None
assert ET.iselement(doc_parent)
assert src_element is not None
assert ET.iselement(src_element)
e = ET.SubElement(doc_parent, 'div')
try:
self.decoreate(e, src_element)
except Exception, ex:
pprint.pprint(src_element)
raise ex
return e

def decoreate(self, e, src_element):
addr = src_element.get('addr')
if src_element.tag == 'procedure':
e.attrib['class'] = 'procedure'
e.text = 'proc: %s\n'%(addr)
elif src_element.tag == 'fragment':
e.attrib['class'] = 'fragment'
e.text = 'frag: %s\n'%(addr)
elif src_element.tag == 'op':
e.attrib['id'] = addr
e.attrib['class'] = 'op'
operator = src_element.get('operator', None)
operand = src_element.get('operand', None)
if 'j' in operator:
a = ET.SubElement(e, 'a', href='#%s'%(operand))
a.text = '%s %s\n'%(operator, operand)
elif operator.startswith('call'):
if operand.startswith('dword'):
html_frag = operand.rsplit(' ')[-1][:-1]
else:
html_frag = operand
a = ET.SubElement(e, 'a', href='#%s'%(html_frag))
a.text = '%s %s\n'%(operator, operand)
else:
e.text = '%s %s\n'%(operator, operand)
else:
pass

asm = file('hoge.asm', 'r')
p = Parser()
p.parse(asm)
visit = MakeHTMLVisit()
visit.accept(p.assembly)
html = visit.doc.html()
html.write("hoge.html")

0 件のコメント: