Skip to content

Commit b701560

Browse files
committed
Merge branch 'python3'
2 parents a65f257 + cca01cd commit b701560

File tree

1 file changed

+198
-0
lines changed

1 file changed

+198
-0
lines changed

pythonparser3

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
#!/usr/bin/env python3
2+
3+
import sys
4+
import json as json
5+
import ast
6+
import asttokens
7+
from xml.sax.saxutils import quoteattr
8+
import argparse
9+
10+
def read_file_to_string(filename):
11+
f = open(filename, 'rt')
12+
s = f.read()
13+
f.close()
14+
return s
15+
16+
17+
def parse_file(filename):
18+
tree = asttokens.ASTTokens(read_file_to_string(filename), parse=True).tree
19+
20+
json_tree = []
21+
22+
def localize(node, json_node):
23+
json_node['lineno'] = str(node.first_token.start[0])
24+
json_node['col'] = str(node.first_token.start[1])
25+
json_node['end_line_no'] = str(node.last_token.end[0])
26+
json_node['end_col'] = str(node.last_token.end[1])
27+
28+
def gen_identifier(identifier, node_type = 'identifier', node=None):
29+
pos = len(json_tree)
30+
json_node = {}
31+
json_tree.append(json_node)
32+
json_node['type'] = node_type
33+
json_node['value'] = identifier
34+
localize(node, json_node)
35+
return pos
36+
37+
def traverse_list(l, node_type = 'list', node = None):
38+
pos = len(json_tree)
39+
json_node = {}
40+
json_tree.append(json_node)
41+
json_node['type'] = node_type
42+
localize(node, json_node)
43+
children = []
44+
for item in l:
45+
children.append(traverse(item))
46+
if (len(children) != 0):
47+
json_node['children'] = children
48+
return pos
49+
50+
def traverse(node):
51+
pos = len(json_tree)
52+
json_node = {}
53+
json_tree.append(json_node)
54+
json_node['type'] = type(node).__name__
55+
localize(node, json_node)
56+
children = []
57+
if isinstance(node, ast.Name):
58+
json_node['value'] = node.id
59+
elif isinstance(node, ast.NameConstant):
60+
json_node['value'] = node.value
61+
elif isinstance(node, ast.Constant):
62+
json_node['value'] = node.value
63+
elif isinstance(node, ast.Num):
64+
json_node['value'] = (node.n)
65+
elif isinstance(node, ast.Str):
66+
json_node['value'] = node.s
67+
elif isinstance(node, ast.alias):
68+
json_node['value'] = (node.name)
69+
if node.asname:
70+
children.append(gen_identifier(node.asname, node = node))
71+
elif isinstance(node, ast.FunctionDef):
72+
json_node['value'] = (node.name)
73+
elif isinstance(node, ast.ExceptHandler):
74+
if node.name:
75+
json_node['value'] = node.name
76+
elif isinstance(node, ast.ClassDef):
77+
json_node['value'] = (node.name)
78+
elif isinstance(node, ast.ImportFrom):
79+
if node.module:
80+
json_node['value'] = (node.module)
81+
elif isinstance(node, ast.Global):
82+
for n in node.names:
83+
children.append(gen_identifier(n, node = node))
84+
elif isinstance(node, ast.keyword):
85+
json_node['value'] = (node.arg)
86+
elif isinstance(node, ast.arg):
87+
json_node['value'] = (node.arg)
88+
89+
90+
# Process children.
91+
if isinstance(node, ast.For):
92+
children.append(traverse(node.target))
93+
children.append(traverse(node.iter))
94+
children.append(traverse_list(node.body, 'body', node))
95+
if node.orelse:
96+
children.append(traverse_list(node.orelse, 'orelse', node))
97+
elif isinstance(node, ast.If) or isinstance(node, ast.While):
98+
children.append(traverse(node.test))
99+
children.append(traverse_list(node.body, 'body', node))
100+
if node.orelse:
101+
children.append(traverse_list(node.orelse, 'orelse', node))
102+
elif isinstance(node, ast.With):
103+
children.append(traverse_list(node.items, 'items', node))
104+
children.append(traverse_list(node.body, 'body', node))
105+
elif isinstance(node, ast.withitem):
106+
children.append(traverse(node.context_expr))
107+
if node.optional_vars:
108+
children.append(traverse(node.optional_vars))
109+
elif isinstance(node, ast.Try):
110+
children.append(traverse_list(node.body, 'body', node))
111+
children.append(traverse_list(node.handlers, 'handlers', node))
112+
if node.orelse:
113+
children.append(traverse_list(node.orelse, 'orelse', node))
114+
if node.finalbody:
115+
children.append(traverse_list(node.finalbody, 'finalbody', node))
116+
elif isinstance(node, ast.arguments):
117+
children.append(traverse_list(node.args, 'args', node))
118+
children.append(traverse_list(node.defaults, 'defaults', node))
119+
children.append(traverse_list(node.kwonlyargs, 'defaults', node))
120+
children.append(traverse_list(node.kw_defaults, 'defaults', node))
121+
if node.vararg:
122+
children.append(gen_identifier(node.vararg.arg, 'vararg', node.vararg))
123+
if node.kwarg:
124+
children.append(gen_identifier(node.kwarg.arg, 'kwarg', node.kwarg))
125+
elif isinstance(node, ast.ExceptHandler):
126+
if node.type:
127+
children.append(traverse_list([node.type], 'type', node))
128+
children.append(traverse_list(node.body, 'body', node))
129+
elif isinstance(node, ast.ClassDef):
130+
children.append(traverse_list(node.bases, 'bases', node))
131+
children.append(traverse_list(node.body, 'body', node))
132+
children.append(traverse_list(node.decorator_list, 'decorator_list', node))
133+
elif isinstance(node, ast.FunctionDef):
134+
children.append(traverse(node.args))
135+
children.append(traverse_list(node.body, 'body', node))
136+
children.append(traverse_list(node.decorator_list, 'decorator_list', node))
137+
else:
138+
# Default handling: iterate over children.
139+
for child in ast.iter_child_nodes(node):
140+
if isinstance(child, ast.expr_context) or isinstance(child, ast.operator) or isinstance(child, ast.boolop) or isinstance(child, ast.unaryop) or isinstance(child, ast.cmpop):
141+
# Directly include expr_context, and operators into the type instead of creating a child.
142+
json_node['type'] = json_node['type'] + type(child).__name__
143+
else:
144+
children.append(traverse(child))
145+
146+
if isinstance(node, ast.Attribute):
147+
children.append(gen_identifier(node.attr, 'attr', node))
148+
149+
if (len(children) != 0):
150+
json_node['children'] = children
151+
return pos
152+
153+
traverse(tree)
154+
return json_tree
155+
156+
157+
def json2xml(tree):
158+
lines = []
159+
def convert_node(i, indent_level=0):
160+
node = tree[i]
161+
line = "\t" * indent_level + "<{}".format(node['type'])
162+
for key in ['value', 'lineno', 'col', 'end_line_no', 'end_col']:
163+
if key in node:
164+
line += (' {}={}'.format(key, quoteattr(str(node[key]))))
165+
line += ">"
166+
lines.append(line)
167+
if "children" in node:
168+
for child in node["children"]:
169+
convert_node(int(child), indent_level + 1)
170+
lines.append("\t" * indent_level + "</" + node["type"] + ">")
171+
return lines
172+
173+
return "\n".join(convert_node(0))
174+
175+
176+
def parse(filename):
177+
try:
178+
json_tree = parse_file(filename)
179+
return json2xml(json_tree)
180+
181+
except (UnicodeEncodeError, UnicodeDecodeError):
182+
pass
183+
184+
185+
if __name__ == "__main__":
186+
parser = argparse.ArgumentParser(description='Parse python3 file')
187+
parser.add_argument('filename', type=str, help='Filename')
188+
parser.add_argument('--noprintxml', action="store_true", help='Show')
189+
parser.add_argument('--printjson', action="store_true", help='Show json')
190+
191+
args = parser.parse_args()
192+
json_tree = parse_file(args.filename)
193+
xml = json2xml(json_tree)
194+
195+
if not args.noprintxml:
196+
print(xml)
197+
if args.printjson:
198+
print(json_tree)

0 commit comments

Comments
 (0)