Java class file parser written in RPython(PyPy)
1 import os
2
3 class Root:
4 def __init__(self):
5 pass
6 def to_string(self):
7 return '''magic: %d
8 minor_version: %d
9 major_version: %d
10 access_flags: %d
11 this_class: %d
12 super_class: %d
13 ''' % (self.magic, self.minor_version, self.major_version, self.access_flags, self.this_class, self.super_class)
14
15 class Cp_info:
16 def __init__(self):
17 pass
18
19 class Field_method_info:
20 def __init__(self):
21 pass
22
23 class Attribute_info:
24 def __init__(self):
25 pass
26
27 class Exception_table:
28 def __init__(self):
29 pass
30
31 class Classes:
32 def __init__(self):
33 pass
34
35 class Line_number_table:
36 def __init__(self):
37 pass
38
39 class Local_variable_table:
40 def __init__(self):
41 pass
42
43 class Local_variable_type_table:
44 def __init__(self):
45 pass
46
47 class Annotation:
48 def __init__(self):
49 pass
50
51 class Element_value_pairs:
52 def __init__(self):
53 pass
54
55 class Element_value:
56 def __init__(self):
57 pass
58
59
60 class JavaClassFile:
61 def __init__(self, filename):
62 f = os.open(filename, os.O_RDONLY, 0777)
63 self.buffer = ''
64 while True:
65 r = os.read(f, 4096)
66 if len(r) == 0:
67 break
68 self.buffer += r
69 os.close(f)
70
71 CONSTANT_POOL_TYPE = {
72 7 : 'CONSTANT_Class',
73 9 : 'CONSTANT_Fieldref',
74 10 : 'CONSTANT_Methodref',
75 11 : 'CONSTANT_InterfaceMethodref',
76 8 : 'CONSTANT_String',
77 3 : 'CONSTANT_Integer',
78 4 : 'CONSTANT_Float',
79 5 : 'CONSTANT_Long',
80 6 : 'CONSTANT_Double',
81 12 : 'CONSTANT_NameAndType',
82 1 : 'CONSTANT_Utf8'}
83
84 BASE_TYPE = {
85 'B' : 'byte',
86 'C' : 'char',
87 'D' : 'double',
88 'F' : 'float',
89 'I' : 'int',
90 'J' : 'long',
91 'L' : 'Classname; reference',
92 'S' : 'short',
93 'Z' : 'boolean',
94 '[' : 'reference'}
95
96 ELEMENT_TYPE = {
97 's' : 'String',
98 'e' : 'enum constant',
99 'c' : 'class',
100 '@' : 'annotation type',
101 '[' : 'array'}
102 ELEMENT_TYPE.update(BASE_TYPE)
103
104 CLASS_ACCESS_AND_PROPERTY_MODIFIER = {
105 0x0001 : 'ACC_PUBLIC',
106 0x0010 : 'ACC_FINAL',
107 0x0020 : 'ACC_SUPER',
108 0x0200 : 'ACC_INTERFACE',
109 0x0400 : 'ACC_ABSTRACT',
110 0x1000 : 'ACC_SYNTHETIC',
111 0x2000 : 'ACC_ANNOTATION',
112 0x4000 : 'ACC_ENUM'}
113
114 def c(self):
115 ret, = self.buffer[0:1]
116 self.buffer = self.buffer[1:]
117 return ret
118
119 def u1(self):
120 ret = ord(self.c())
121 if ret < 0:
122 raise IndexError
123 return ret
124
125 def u2(self):
126 a = ord(self.c())
127 b = ord(self.c())
128 ret = (a << 8) + b
129 if ret < 0:
130 raise IndexError
131 return ret
132
133 def i4(self):
134 a = ord(self.c())
135 b = ord(self.c())
136 c = ord(self.c())
137 d = ord(self.c())
138 return (a << 24) + (b << 16) + (c << 8) + d
139
140 def u4(self):
141 ret = self.i4()
142 if ret < 0:
143 raise NotImplementedError, 'code is too big'
144 return ret
145
146 def parse(self):
147 self.root = Root()
148 r = self.root
149 r.magic = self.i4()
150 r.minor_version = self.u2()
151 r.major_version = self.u2()
152 r.constant_pool = self.cp_info_list()
153 r.access_flags = self.u2()
154 r.this_class = self.u2()
155 r.super_class = self.u2()
156 r.interfaces = self.u2_list()
157 r.fields = self.field_method_info_list()
158 r.methods = self.field_method_info_list()
159 r.attributes = self.attribute_info_list()
160 return self.root
161
162 def range_u2(self):
163 return range(self.u2())
164
165 def u2_list(self):
166 ret = []
167 for i in self.range_u2():
168 ret.append(self.u2())
169 return ret
170
171 def exception_table(self):
172 et = Exception_table()
173 et.start_pc = self.u2()
174 et.end_pc = self.u2()
175 et.handler_pc = self.u2()
176 et.catch_type = self.u2()
177 return et
178
179 def cp_info_list(self):
180 ret = []
181 for i in range(self.u2() - 1):
182 ret.append(self.cp_info())
183 return ret
184
185 def cp_info(self):
186 tag = self.u1()
187 cp = Cp_info()
188 cp.tag = tag
189 t = ''
190 try:
191 t = JavaClassFile.CONSTANT_POOL_TYPE[tag]
192 except KeyError:
193 raise IndexError, 'bad cp_info tag value'
194
195 if t == 'CONSTANT_Class':
196 cp.name_index = self.u2()
197 elif t == 'CONSTANT_Fieldref' or t == 'CONSTANT_Methodref' or t == 'CONSTANT_InterfaceMethodref':
198 cp.class_index = self.u2()
199 cp.name_and_type_index = self.u2()
200 elif t == 'CONSTANT_String':
201 cp.string_index = self.u2()
202 elif t == 'CONSTANT_Integer' or t == 'CONSTANT_Float':
203 cp.bytes = self.u4()
204 elif t == 'CONSTANT_Long' or t == 'CONSTANT_Double':
205 cp.high_bytes = self.u4()
206 cp.low_bytes = self.u4()
207 elif t == 'CONSTANT_NameAndType':
208 cp.name_index = self.u2()
209 cp.descriptor_index = self.u2()
210 elif t == 'CONSTANT_Utf8':
211 cp.utf8 = self.utf8_list()
212 else:
213 raise Exception, 'tag of cp_info is bad.'
214 return cp
215
216 def utf8_list(self):
217 length = self.u2()
218 ret = self.buffer[0:length]
219 self.buffer = self.buffer[length:]
220 return ret
221
222 def field_method_info_list(self):
223 ret = []
224 for i in self.range_u2():
225 ret.append(self.field_method_info())
226 return ret
227
228 def field_method_info(self):
229 fm = Field_method_info()
230 fm.access_flags = self.u2()
231 fm.name_index = self.u2()
232 fm.descriptor_index = self.u2()
233 fm.attributes = self.attribute_info_list()
234 return fm
235
236 def attribute_info_list(self):
237 ret = []
238 for i in self.range_u2():
239 ret.append(self.attribute_info())
240 return ret
241
242 def attribute_info(self):
243 at = Attribute_info()
244 at.attribute_name_index = self.u2()
245 attribute_length = self.u4()
246 #print 'attribute_length:'
247 #print attribute_length
248 next_buffer = self.buffer[attribute_length:]
249 self.buffer = self.buffer[0:attribute_length]
250 try:
251 #an = self.total_dic['constant_pool'][ret['attribute_name_index'] - 1]['bytes']
252 an = self.root.constant_pool[at.attribute_name_index - 1].utf8
253 except KeyError:
254 import traceback, sys
255 print 'attribute_name_index error'
256 print at.attribute_name_index
257 traceback.print_exc(file=sys.stdout)
258 print ''
259 at.info = self.buffer
260 self.buffer = next_buffer
261 return at
262
263 if an == 'ConstantValue':
264 at.constantvalue_index = self.u2()
265 elif an == 'Code':
266 at.max_stack = self.u2()
267 at.max_locals = self.u2()
268 at.code = self.code_list()
269 at.exception_table = self.exception_table_list()
270 at.attributes = self.attribute_info_list()
271 elif an == 'Exceptions':
272 at.exception_index_table = self.u2_list()
273 elif an == 'InnerClasses':
274 at.classes = self.classes_list()
275 elif an == 'EnclosingMethod':
276 at.class_index = self.u2()
277 at.method_index = self.u2()
278 elif an == 'Synthetic':
279 pass
280 elif an == 'Signature':
281 at.signature_index = self.u2()
282 elif an == 'SourceFile':
283 at.sourcefile_index = self.u2()
284 elif an == 'SourceDebugExtension':
285 at.debug_extension = self.buffer
286 elif an == 'LineNumberTable':
287 at.line_number_table = self.line_number_table_list()
288 elif an == 'LocalVariableTable':
289 at.local_variable_table = self.local_variable_table_list()
290 elif an == 'LocalVariableTypeTable':
291 at.local_variable_type_table = self.local_variable_type_table_list()
292 elif an == 'Deprecated':
293 pass
294 elif an == 'RuntimeVisibleAnnotations' or an == 'RuntimeInvisibleAnnotations':
295 at.annotations = self.annotation_list()
296 elif an == 'RuntimeVisibleParameterAnnotations':
297 at.parameter_annotations = self.annotation_list()
298 elif an == 'AnnotationDefault':
299 at.default_value = self.element_value()
300 elif an == 'StackMapTable':
301 print 'warning: StackMapTable not supported'
302 else:
303 print 'unknown attribute'
304 at.info = self.buffer
305
306 self.buffer = next_buffer
307 return at
308
309 def code_list(self):
310 ret = []
311 for i in range(self.u4()):
312 ret.append(self.code_array())
313 return ret
314
315 def code_array(self):
316 return self.u1()
317
318 def exception_table_list(self):
319 ret = []
320 for i in self.range_u2():
321 ret.append(self.exception_table())
322 return ret
323
324 def classes_list(self):
325 ret = []
326 for i in self.range_u2():
327 ret.append(self.classes())
328 return ret
329
330 def classes(self):
331 c = Classes()
332 c.inner_class_info_index = self.u2()
333 c.outer_class_info_index = self.u2()
334 c.inner_name_index = self.u2()
335 c.inner_class_access_flags = self.u2()
336 return c
337
338 def line_number_table_list(self):
339 ret = []
340 for i in self.range_u2():
341 ret.append(self.line_number_table())
342 return ret
343
344 def line_number_table(self):
345 ln = Line_number_table()
346 ln.start_pc = self.u2()
347 ln.line_number = self.u2()
348 return ln
349
350 def local_variable_table_list(self):
351 ret = []
352 for i in self.range_u2():
353 ret.append(self.local_variable_table())
354 return ret
355
356 def local_variable_table(self):
357 lv = Local_variable_table()
358 lv.start_pc = self.u2()
359 lv.length = self.u2()
360 lv.name_index = self.u2()
361 lv.descriptor_index = self.u2()
362 lv.index = self.u2()
363 return lv
364
365 def local_variable_type_table_list(self):
366 ret = []
367 for i in self.range_u2():
368 ret.append(self.local_variable_type_table())
369 return ret
370
371 def local_variable_type_table(self):
372 lv = Local_variable_type_table()
373 lv.start_pc = self.u2()
374 lv.length = self.u2()
375 lv.name_index = self.u2()
376 lv.signature_index = self.u2()
377 lv.index = self.u2()
378 return lv
379
380 def annotation_list(self):
381 ret = []
382 for i in self.range_u2():
383 ret.append(self.annotation())
384 return ret
385
386 def annotation(self):
387 at = Annotation()
388 at.type_index = self.u2()
389 at.element_value_pairs = self.element_value_pairs_list()
390 return at
391
392 def element_value_pairs_list(self):
393 ret = []
394 for i in self.range_u2():
395 ret.append(self.element_value_pairs())
396 return ret
397
398 def element_value_pairs(self):
399 ev = Element_value_pairs()
400 ev.element_name_index = self.u2()
401 ev.value = self.element_value()
402 return ev
403
404 def element_value_list(self):
405 ret = []
406 for i in self.range_u2():
407 ret.append(self.element_value())
408 return ret
409
410 def element_value(self):
411 ev = Element_value()
412 et = self.c()
413 const_value_index_use = ['B', 'C', 'D', 'F', 'I', 'J', 'S', 'Z', 's']
414 if et in const_value_index_use:
415 ev.const_value_index = self.u2()
416 elif et == 'e':
417 ev.type_name_index = self.u2()
418 ev.const_name_index = self.u2()
419 elif et == 'c':
420 ev.class_info_index = self.u2()
421 elif et == '@':
422 ev.annotation_value = self.annotation()
423 elif et == '[':
424 ev.array_value = self.element_value_list()
425 else:
426 print et
427 raise Exception, 'tag of element_value is bad.'
428 return ev
429
430 def entry_point(argv):
431 try:
432 filename = argv[1]
433 except IndexError:
434 print "No source file"
435 return 1
436
437 classFile = JavaClassFile(filename)
438 classFile.parse()
439 print classFile.root.to_string()
440 return 0
441
442 def target(*args):
443 return entry_point, None
444
445 if __name__ == "__main__":
446 import sys
447 entry_point(sys.argv)
Discussion
RPython is a hell. -No map(). -Type restrict dictionary and list. -No generic programming. (Look at a lot of duplicated codes in foo_list() methods...) -No type declaration.
Meta tracing JIT may be the way to go, BUT RPython is NOT obviously.
Comments
Sign in to leave a comment.

