|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +import sys |
| 4 | +import json as json |
| 5 | +import ast |
| 6 | +import asttokens |
| 7 | +from xml.sax.saxutils import quoteattr |
| 8 | +import argparse |
| 9 | + |
| 10 | +def read_file_to_string(filename): |
| 11 | + f = open(filename, 'rt') |
| 12 | + s = f.read() |
| 13 | + f.close() |
| 14 | + return s |
| 15 | + |
| 16 | + |
| 17 | +def parse_file(filename): |
| 18 | + tree = asttokens.ASTTokens(read_file_to_string(filename), parse=True).tree |
| 19 | + |
| 20 | + json_tree = [] |
| 21 | + |
| 22 | + def localize(node, json_node): |
| 23 | + json_node['lineno'] = str(node.first_token.start[0]) |
| 24 | + json_node['col'] = str(node.first_token.start[1]) |
| 25 | + json_node['end_line_no'] = str(node.last_token.end[0]) |
| 26 | + json_node['end_col'] = str(node.last_token.end[1]) |
| 27 | + |
| 28 | + def gen_identifier(identifier, node_type = 'identifier', node=None): |
| 29 | + pos = len(json_tree) |
| 30 | + json_node = {} |
| 31 | + json_tree.append(json_node) |
| 32 | + json_node['type'] = node_type |
| 33 | + json_node['value'] = identifier |
| 34 | + localize(node, json_node) |
| 35 | + return pos |
| 36 | + |
| 37 | + def traverse_list(l, node_type = 'list', node = None): |
| 38 | + pos = len(json_tree) |
| 39 | + json_node = {} |
| 40 | + json_tree.append(json_node) |
| 41 | + json_node['type'] = node_type |
| 42 | + localize(node, json_node) |
| 43 | + children = [] |
| 44 | + for item in l: |
| 45 | + children.append(traverse(item)) |
| 46 | + if (len(children) != 0): |
| 47 | + json_node['children'] = children |
| 48 | + return pos |
| 49 | + |
| 50 | + def traverse(node): |
| 51 | + pos = len(json_tree) |
| 52 | + json_node = {} |
| 53 | + json_tree.append(json_node) |
| 54 | + json_node['type'] = type(node).__name__ |
| 55 | + localize(node, json_node) |
| 56 | + children = [] |
| 57 | + if isinstance(node, ast.Name): |
| 58 | + json_node['value'] = node.id |
| 59 | + elif isinstance(node, ast.NameConstant): |
| 60 | + json_node['value'] = node.value |
| 61 | + elif isinstance(node, ast.Constant): |
| 62 | + json_node['value'] = node.value |
| 63 | + elif isinstance(node, ast.Num): |
| 64 | + json_node['value'] = (node.n) |
| 65 | + elif isinstance(node, ast.Str): |
| 66 | + json_node['value'] = node.s |
| 67 | + elif isinstance(node, ast.alias): |
| 68 | + json_node['value'] = (node.name) |
| 69 | + if node.asname: |
| 70 | + children.append(gen_identifier(node.asname, node = node)) |
| 71 | + elif isinstance(node, ast.FunctionDef): |
| 72 | + json_node['value'] = (node.name) |
| 73 | + elif isinstance(node, ast.ExceptHandler): |
| 74 | + if node.name: |
| 75 | + json_node['value'] = node.name |
| 76 | + elif isinstance(node, ast.ClassDef): |
| 77 | + json_node['value'] = (node.name) |
| 78 | + elif isinstance(node, ast.ImportFrom): |
| 79 | + if node.module: |
| 80 | + json_node['value'] = (node.module) |
| 81 | + elif isinstance(node, ast.Global): |
| 82 | + for n in node.names: |
| 83 | + children.append(gen_identifier(n, node = node)) |
| 84 | + elif isinstance(node, ast.keyword): |
| 85 | + json_node['value'] = (node.arg) |
| 86 | + elif isinstance(node, ast.arg): |
| 87 | + json_node['value'] = (node.arg) |
| 88 | + |
| 89 | + |
| 90 | + # Process children. |
| 91 | + if isinstance(node, ast.For): |
| 92 | + children.append(traverse(node.target)) |
| 93 | + children.append(traverse(node.iter)) |
| 94 | + children.append(traverse_list(node.body, 'body', node)) |
| 95 | + if node.orelse: |
| 96 | + children.append(traverse_list(node.orelse, 'orelse', node)) |
| 97 | + elif isinstance(node, ast.If) or isinstance(node, ast.While): |
| 98 | + children.append(traverse(node.test)) |
| 99 | + children.append(traverse_list(node.body, 'body', node)) |
| 100 | + if node.orelse: |
| 101 | + children.append(traverse_list(node.orelse, 'orelse', node)) |
| 102 | + elif isinstance(node, ast.With): |
| 103 | + children.append(traverse_list(node.items, 'items', node)) |
| 104 | + children.append(traverse_list(node.body, 'body', node)) |
| 105 | + elif isinstance(node, ast.withitem): |
| 106 | + children.append(traverse(node.context_expr)) |
| 107 | + if node.optional_vars: |
| 108 | + children.append(traverse(node.optional_vars)) |
| 109 | + elif isinstance(node, ast.Try): |
| 110 | + children.append(traverse_list(node.body, 'body', node)) |
| 111 | + children.append(traverse_list(node.handlers, 'handlers', node)) |
| 112 | + if node.orelse: |
| 113 | + children.append(traverse_list(node.orelse, 'orelse', node)) |
| 114 | + if node.finalbody: |
| 115 | + children.append(traverse_list(node.finalbody, 'finalbody', node)) |
| 116 | + elif isinstance(node, ast.arguments): |
| 117 | + children.append(traverse_list(node.args, 'args', node)) |
| 118 | + children.append(traverse_list(node.defaults, 'defaults', node)) |
| 119 | + children.append(traverse_list(node.kwonlyargs, 'defaults', node)) |
| 120 | + children.append(traverse_list(node.kw_defaults, 'defaults', node)) |
| 121 | + if node.vararg: |
| 122 | + children.append(gen_identifier(node.vararg.arg, 'vararg', node.vararg)) |
| 123 | + if node.kwarg: |
| 124 | + children.append(gen_identifier(node.kwarg.arg, 'kwarg', node.kwarg)) |
| 125 | + elif isinstance(node, ast.ExceptHandler): |
| 126 | + if node.type: |
| 127 | + children.append(traverse_list([node.type], 'type', node)) |
| 128 | + children.append(traverse_list(node.body, 'body', node)) |
| 129 | + elif isinstance(node, ast.ClassDef): |
| 130 | + children.append(traverse_list(node.bases, 'bases', node)) |
| 131 | + children.append(traverse_list(node.body, 'body', node)) |
| 132 | + children.append(traverse_list(node.decorator_list, 'decorator_list', node)) |
| 133 | + elif isinstance(node, ast.FunctionDef): |
| 134 | + children.append(traverse(node.args)) |
| 135 | + children.append(traverse_list(node.body, 'body', node)) |
| 136 | + children.append(traverse_list(node.decorator_list, 'decorator_list', node)) |
| 137 | + else: |
| 138 | + # Default handling: iterate over children. |
| 139 | + for child in ast.iter_child_nodes(node): |
| 140 | + if isinstance(child, ast.expr_context) or isinstance(child, ast.operator) or isinstance(child, ast.boolop) or isinstance(child, ast.unaryop) or isinstance(child, ast.cmpop): |
| 141 | + # Directly include expr_context, and operators into the type instead of creating a child. |
| 142 | + json_node['type'] = json_node['type'] + type(child).__name__ |
| 143 | + else: |
| 144 | + children.append(traverse(child)) |
| 145 | + |
| 146 | + if isinstance(node, ast.Attribute): |
| 147 | + children.append(gen_identifier(node.attr, 'attr', node)) |
| 148 | + |
| 149 | + if (len(children) != 0): |
| 150 | + json_node['children'] = children |
| 151 | + return pos |
| 152 | + |
| 153 | + traverse(tree) |
| 154 | + return json_tree |
| 155 | + |
| 156 | + |
| 157 | +def json2xml(tree): |
| 158 | + lines = [] |
| 159 | + def convert_node(i, indent_level=0): |
| 160 | + node = tree[i] |
| 161 | + line = "\t" * indent_level + "<{}".format(node['type']) |
| 162 | + for key in ['value', 'lineno', 'col', 'end_line_no', 'end_col']: |
| 163 | + if key in node: |
| 164 | + line += (' {}={}'.format(key, quoteattr(str(node[key])))) |
| 165 | + line += ">" |
| 166 | + lines.append(line) |
| 167 | + if "children" in node: |
| 168 | + for child in node["children"]: |
| 169 | + convert_node(int(child), indent_level + 1) |
| 170 | + lines.append("\t" * indent_level + "</" + node["type"] + ">") |
| 171 | + return lines |
| 172 | + |
| 173 | + return "\n".join(convert_node(0)) |
| 174 | + |
| 175 | + |
| 176 | +def parse(filename): |
| 177 | + try: |
| 178 | + json_tree = parse_file(filename) |
| 179 | + return json2xml(json_tree) |
| 180 | + |
| 181 | + except (UnicodeEncodeError, UnicodeDecodeError): |
| 182 | + pass |
| 183 | + |
| 184 | + |
| 185 | +if __name__ == "__main__": |
| 186 | + parser = argparse.ArgumentParser(description='Parse python3 file') |
| 187 | + parser.add_argument('filename', type=str, help='Filename') |
| 188 | + parser.add_argument('--noprintxml', action="store_true", help='Show') |
| 189 | + parser.add_argument('--printjson', action="store_true", help='Show json') |
| 190 | + |
| 191 | + args = parser.parse_args() |
| 192 | + json_tree = parse_file(args.filename) |
| 193 | + xml = json2xml(json_tree) |
| 194 | + |
| 195 | + if not args.noprintxml: |
| 196 | + print(xml) |
| 197 | + if args.printjson: |
| 198 | + print(json_tree) |
0 commit comments