It format output into html, with fragment jump for jump and call instructions.
import pprint
import re
import elementtree.ElementTree as ET
ASSEMBLY_START = '+++++++++++++++++++ ASSEMBLY CODE LISTING +++++++++++++++++++'
PROC_BOUNDARY = '========='
FRAG_BOUNDARY = '---------'
ASM_FORMAT = (r''':(?P[0-9A-Z]{8}) '''
'''(?P([0-9A-Z]{2})+)[ ]+'''
'''(?P[a-z]+)'''
'''( (?P([a-zA-Z0-9,+-<> ]|\[|\])+))?'''
)
asm_line = re.compile(ASM_FORMAT)
CROSS_REFERENCE_START = '*************** Cross Reference Listing ****************'
FRAG_LINE = r'--(?P[0-9A-Z]{8})::((?P [0-9A-Z]{8}),)+'
PROC_LINE = r'==(?P[0-9A-Z]{8})::((?P [0-9A-Z]{8}),)+'
CONT_LINE = r'[ ]+((?P[0-9A-Z]{8}),)+'
class Parser(object):
'''
>>> asm = file('hoge.asm', 'r')
>>> p = Parser()
>>> p.parse(asm)
>>> for n in p.assembly:
... print n, n.attrib['addr']
>>> print p.assembly[14][3], p.assembly[14][3].attrib['addr']
>>> for op in p.assembly[14][3]:
... print op, op.attrib['addr'], op.attrib['raw'], \
op.attrib['operator'],\
op.attrib['operand']
'''
def __init__(self):
self.current_parse = self.parse_something
self.prev_line = ''
self.assembly = ET.Element('program')
self.proc = ET.SubElement(self.assembly, 'procedure')
self.proc.attrib['addr'] = '0x0'
self.frag = ET.SubElement(self.proc, 'fragment')
self.frag.attrib['addr'] = '0x0'
def parse(self, f):
try:
for i, line in enumerate(f):
#pprint.pprint((i+1, line))
self.current_parse(line)
self.prev_line = line
except Exception, e:
print 'something wrong around line', i+1
pprint.pprint((i, self.prev_line))
pprint.pprint((i+1, line))
pprint.pprint(self.assembly)
pprint.pprint(self.proc)
pprint.pprint(self.frag)
raise e
def parse_assembly(self, line):
assert self.assembly is not None
if line.startswith(CROSS_REFERENCE_START):
print 'CROSS_REFERENCE_START'
self.current_parse = self.parse_reference
return
elif line.startswith(PROC_BOUNDARY):
self.proc = ET.SubElement(self.assembly, 'procedure')
assert self.proc is not None
self.frag = ET.SubElement(self.proc, 'fragment')
assert self.frag is not None
return
elif line.startswith(FRAG_BOUNDARY):
assert self.proc is not None
self.frag = ET.SubElement(self.proc, 'fragment')
assert self.frag is not None
return
else:
pass
for matchobj in asm_line.finditer(line):
d = matchobj.groupdict()
if 'line_addr' in d:
match = d['line_addr']
if self.prev_line.startswith(PROC_BOUNDARY):
assert self.proc is not None
self.proc.attrib['addr']= match
if self.prev_line.startswith(PROC_BOUNDARY) or \
self.prev_line.startswith(FRAG_BOUNDARY):
assert self.proc is not None
assert self.frag is not None
self.frag.attrib['addr']= match
op = ET.SubElement(self.frag, 'op')
op.attrib['addr'] = match
if 'raw_binary' in d:
op.attrib['raw'] = d['raw_binary']
if 'operator' in d:
op.attrib['operator'] = d['operator']
if 'operand' in d:
op.attrib['operand'] = d['operand']
def parse_reference(self, line):
pass
def parse_something(self, line):
if line.startswith(ASSEMBLY_START):
print 'ASSEMBLY_START'
self.current_parse = self.parse_assembly
class DocTree(object):
def __init__(self):
self.root = ET.Element('html')
assert self.root is not None
self.head = ET.SubElement(self.root, "head")
title = ET.SubElement(self.head, "title")
title.text = "formatted asm result"
self.body = ET.SubElement(self.root, 'body')
assert self.body is not None
def setcss(self, css):
e = ET.fromstring(css)
print 'using css'
print '-' * 20
print e
print e.text
print '-' * 20
self.head.append(e)
def html(self):
return ET.ElementTree(self.root)
css = '''
'''
class MakeHTMLVisit(object):
def __init__(self):
self.doc = DocTree()
self.doc.setcss(css)
assert self.doc.body is not None
self.current = self.doc.body
assert self.current is not None
def accept(self, e):
assert self.current is not None
parent = self.current
self.current = self.SubElement(parent, e)
for c in e:
self.accept(c)
self.current = parent
def SubElement(self, doc_parent, src_element):
assert doc_parent is not None
assert ET.iselement(doc_parent)
assert src_element is not None
assert ET.iselement(src_element)
e = ET.SubElement(doc_parent, 'div')
try:
self.decoreate(e, src_element)
except Exception, ex:
pprint.pprint(src_element)
raise ex
return e
def decoreate(self, e, src_element):
addr = src_element.get('addr')
if src_element.tag == 'procedure':
e.attrib['class'] = 'procedure'
e.text = 'proc: %s\n'%(addr)
elif src_element.tag == 'fragment':
e.attrib['class'] = 'fragment'
e.text = 'frag: %s\n'%(addr)
elif src_element.tag == 'op':
e.attrib['id'] = addr
e.attrib['class'] = 'op'
operator = src_element.get('operator', None)
operand = src_element.get('operand', None)
if 'j' in operator:
a = ET.SubElement(e, 'a', href='#%s'%(operand))
a.text = '%s %s\n'%(operator, operand)
elif operator.startswith('call'):
if operand.startswith('dword'):
html_frag = operand.rsplit(' ')[-1][:-1]
else:
html_frag = operand
a = ET.SubElement(e, 'a', href='#%s'%(html_frag))
a.text = '%s %s\n'%(operator, operand)
else:
e.text = '%s %s\n'%(operator, operand)
else:
pass
asm = file('hoge.asm', 'r')
p = Parser()
p.parse(asm)
visit = MakeHTMLVisit()
visit.accept(p.assembly)
html = visit.doc.html()
html.write("hoge.html")
0 件のコメント:
コメントを投稿