Profile image for hajime nakazato hajimen
Java class file parser written in RPython(PyPy)
Language
Python 3
Tags
java pypy rpython

Java class file parser written in RPython(PyPy)

1 import os 2 3 class Root: 4 def __init__(self): 5 pass 6 def to_string(self): 7 return '''magic: %d 8 minor_version: %d 9 major_version: %d 10 access_flags: %d 11 this_class: %d 12 super_class: %d 13 ''' % (self.magic, self.minor_version, self.major_version, self.access_flags, self.this_class, self.super_class) 14 15 class Cp_info: 16 def __init__(self): 17 pass 18 19 class Field_method_info: 20 def __init__(self): 21 pass 22 23 class Attribute_info: 24 def __init__(self): 25 pass 26 27 class Exception_table: 28 def __init__(self): 29 pass 30 31 class Classes: 32 def __init__(self): 33 pass 34 35 class Line_number_table: 36 def __init__(self): 37 pass 38 39 class Local_variable_table: 40 def __init__(self): 41 pass 42 43 class Local_variable_type_table: 44 def __init__(self): 45 pass 46 47 class Annotation: 48 def __init__(self): 49 pass 50 51 class Element_value_pairs: 52 def __init__(self): 53 pass 54 55 class Element_value: 56 def __init__(self): 57 pass 58 59 60 class JavaClassFile: 61 def __init__(self, filename): 62 f = os.open(filename, os.O_RDONLY, 0777) 63 self.buffer = '' 64 while True: 65 r = os.read(f, 4096) 66 if len(r) == 0: 67 break 68 self.buffer += r 69 os.close(f) 70 71 CONSTANT_POOL_TYPE = { 72 7 : 'CONSTANT_Class', 73 9 : 'CONSTANT_Fieldref', 74 10 : 'CONSTANT_Methodref', 75 11 : 'CONSTANT_InterfaceMethodref', 76 8 : 'CONSTANT_String', 77 3 : 'CONSTANT_Integer', 78 4 : 'CONSTANT_Float', 79 5 : 'CONSTANT_Long', 80 6 : 'CONSTANT_Double', 81 12 : 'CONSTANT_NameAndType', 82 1 : 'CONSTANT_Utf8'} 83 84 BASE_TYPE = { 85 'B' : 'byte', 86 'C' : 'char', 87 'D' : 'double', 88 'F' : 'float', 89 'I' : 'int', 90 'J' : 'long', 91 'L' : 'Classname; reference', 92 'S' : 'short', 93 'Z' : 'boolean', 94 '[' : 'reference'} 95 96 ELEMENT_TYPE = { 97 's' : 'String', 98 'e' : 'enum constant', 99 'c' : 'class', 100 '@' : 'annotation type', 101 '[' : 'array'} 102 ELEMENT_TYPE.update(BASE_TYPE) 103 104 CLASS_ACCESS_AND_PROPERTY_MODIFIER = { 105 0x0001 : 'ACC_PUBLIC', 106 0x0010 : 'ACC_FINAL', 107 0x0020 : 'ACC_SUPER', 108 0x0200 : 'ACC_INTERFACE', 109 0x0400 : 'ACC_ABSTRACT', 110 0x1000 : 'ACC_SYNTHETIC', 111 0x2000 : 'ACC_ANNOTATION', 112 0x4000 : 'ACC_ENUM'} 113 114 def c(self): 115 ret, = self.buffer[0:1] 116 self.buffer = self.buffer[1:] 117 return ret 118 119 def u1(self): 120 ret = ord(self.c()) 121 if ret < 0: 122 raise IndexError 123 return ret 124 125 def u2(self): 126 a = ord(self.c()) 127 b = ord(self.c()) 128 ret = (a << 8) + b 129 if ret < 0: 130 raise IndexError 131 return ret 132 133 def i4(self): 134 a = ord(self.c()) 135 b = ord(self.c()) 136 c = ord(self.c()) 137 d = ord(self.c()) 138 return (a << 24) + (b << 16) + (c << 8) + d 139 140 def u4(self): 141 ret = self.i4() 142 if ret < 0: 143 raise NotImplementedError, 'code is too big' 144 return ret 145 146 def parse(self): 147 self.root = Root() 148 r = self.root 149 r.magic = self.i4() 150 r.minor_version = self.u2() 151 r.major_version = self.u2() 152 r.constant_pool = self.cp_info_list() 153 r.access_flags = self.u2() 154 r.this_class = self.u2() 155 r.super_class = self.u2() 156 r.interfaces = self.u2_list() 157 r.fields = self.field_method_info_list() 158 r.methods = self.field_method_info_list() 159 r.attributes = self.attribute_info_list() 160 return self.root 161 162 def range_u2(self): 163 return range(self.u2()) 164 165 def u2_list(self): 166 ret = [] 167 for i in self.range_u2(): 168 ret.append(self.u2()) 169 return ret 170 171 def exception_table(self): 172 et = Exception_table() 173 et.start_pc = self.u2() 174 et.end_pc = self.u2() 175 et.handler_pc = self.u2() 176 et.catch_type = self.u2() 177 return et 178 179 def cp_info_list(self): 180 ret = [] 181 for i in range(self.u2() - 1): 182 ret.append(self.cp_info()) 183 return ret 184 185 def cp_info(self): 186 tag = self.u1() 187 cp = Cp_info() 188 cp.tag = tag 189 t = '' 190 try: 191 t = JavaClassFile.CONSTANT_POOL_TYPE[tag] 192 except KeyError: 193 raise IndexError, 'bad cp_info tag value' 194 195 if t == 'CONSTANT_Class': 196 cp.name_index = self.u2() 197 elif t == 'CONSTANT_Fieldref' or t == 'CONSTANT_Methodref' or t == 'CONSTANT_InterfaceMethodref': 198 cp.class_index = self.u2() 199 cp.name_and_type_index = self.u2() 200 elif t == 'CONSTANT_String': 201 cp.string_index = self.u2() 202 elif t == 'CONSTANT_Integer' or t == 'CONSTANT_Float': 203 cp.bytes = self.u4() 204 elif t == 'CONSTANT_Long' or t == 'CONSTANT_Double': 205 cp.high_bytes = self.u4() 206 cp.low_bytes = self.u4() 207 elif t == 'CONSTANT_NameAndType': 208 cp.name_index = self.u2() 209 cp.descriptor_index = self.u2() 210 elif t == 'CONSTANT_Utf8': 211 cp.utf8 = self.utf8_list() 212 else: 213 raise Exception, 'tag of cp_info is bad.' 214 return cp 215 216 def utf8_list(self): 217 length = self.u2() 218 ret = self.buffer[0:length] 219 self.buffer = self.buffer[length:] 220 return ret 221 222 def field_method_info_list(self): 223 ret = [] 224 for i in self.range_u2(): 225 ret.append(self.field_method_info()) 226 return ret 227 228 def field_method_info(self): 229 fm = Field_method_info() 230 fm.access_flags = self.u2() 231 fm.name_index = self.u2() 232 fm.descriptor_index = self.u2() 233 fm.attributes = self.attribute_info_list() 234 return fm 235 236 def attribute_info_list(self): 237 ret = [] 238 for i in self.range_u2(): 239 ret.append(self.attribute_info()) 240 return ret 241 242 def attribute_info(self): 243 at = Attribute_info() 244 at.attribute_name_index = self.u2() 245 attribute_length = self.u4() 246 #print 'attribute_length:' 247 #print attribute_length 248 next_buffer = self.buffer[attribute_length:] 249 self.buffer = self.buffer[0:attribute_length] 250 try: 251 #an = self.total_dic['constant_pool'][ret['attribute_name_index'] - 1]['bytes'] 252 an = self.root.constant_pool[at.attribute_name_index - 1].utf8 253 except KeyError: 254 import traceback, sys 255 print 'attribute_name_index error' 256 print at.attribute_name_index 257 traceback.print_exc(file=sys.stdout) 258 print '' 259 at.info = self.buffer 260 self.buffer = next_buffer 261 return at 262 263 if an == 'ConstantValue': 264 at.constantvalue_index = self.u2() 265 elif an == 'Code': 266 at.max_stack = self.u2() 267 at.max_locals = self.u2() 268 at.code = self.code_list() 269 at.exception_table = self.exception_table_list() 270 at.attributes = self.attribute_info_list() 271 elif an == 'Exceptions': 272 at.exception_index_table = self.u2_list() 273 elif an == 'InnerClasses': 274 at.classes = self.classes_list() 275 elif an == 'EnclosingMethod': 276 at.class_index = self.u2() 277 at.method_index = self.u2() 278 elif an == 'Synthetic': 279 pass 280 elif an == 'Signature': 281 at.signature_index = self.u2() 282 elif an == 'SourceFile': 283 at.sourcefile_index = self.u2() 284 elif an == 'SourceDebugExtension': 285 at.debug_extension = self.buffer 286 elif an == 'LineNumberTable': 287 at.line_number_table = self.line_number_table_list() 288 elif an == 'LocalVariableTable': 289 at.local_variable_table = self.local_variable_table_list() 290 elif an == 'LocalVariableTypeTable': 291 at.local_variable_type_table = self.local_variable_type_table_list() 292 elif an == 'Deprecated': 293 pass 294 elif an == 'RuntimeVisibleAnnotations' or an == 'RuntimeInvisibleAnnotations': 295 at.annotations = self.annotation_list() 296 elif an == 'RuntimeVisibleParameterAnnotations': 297 at.parameter_annotations = self.annotation_list() 298 elif an == 'AnnotationDefault': 299 at.default_value = self.element_value() 300 elif an == 'StackMapTable': 301 print 'warning: StackMapTable not supported' 302 else: 303 print 'unknown attribute' 304 at.info = self.buffer 305 306 self.buffer = next_buffer 307 return at 308 309 def code_list(self): 310 ret = [] 311 for i in range(self.u4()): 312 ret.append(self.code_array()) 313 return ret 314 315 def code_array(self): 316 return self.u1() 317 318 def exception_table_list(self): 319 ret = [] 320 for i in self.range_u2(): 321 ret.append(self.exception_table()) 322 return ret 323 324 def classes_list(self): 325 ret = [] 326 for i in self.range_u2(): 327 ret.append(self.classes()) 328 return ret 329 330 def classes(self): 331 c = Classes() 332 c.inner_class_info_index = self.u2() 333 c.outer_class_info_index = self.u2() 334 c.inner_name_index = self.u2() 335 c.inner_class_access_flags = self.u2() 336 return c 337 338 def line_number_table_list(self): 339 ret = [] 340 for i in self.range_u2(): 341 ret.append(self.line_number_table()) 342 return ret 343 344 def line_number_table(self): 345 ln = Line_number_table() 346 ln.start_pc = self.u2() 347 ln.line_number = self.u2() 348 return ln 349 350 def local_variable_table_list(self): 351 ret = [] 352 for i in self.range_u2(): 353 ret.append(self.local_variable_table()) 354 return ret 355 356 def local_variable_table(self): 357 lv = Local_variable_table() 358 lv.start_pc = self.u2() 359 lv.length = self.u2() 360 lv.name_index = self.u2() 361 lv.descriptor_index = self.u2() 362 lv.index = self.u2() 363 return lv 364 365 def local_variable_type_table_list(self): 366 ret = [] 367 for i in self.range_u2(): 368 ret.append(self.local_variable_type_table()) 369 return ret 370 371 def local_variable_type_table(self): 372 lv = Local_variable_type_table() 373 lv.start_pc = self.u2() 374 lv.length = self.u2() 375 lv.name_index = self.u2() 376 lv.signature_index = self.u2() 377 lv.index = self.u2() 378 return lv 379 380 def annotation_list(self): 381 ret = [] 382 for i in self.range_u2(): 383 ret.append(self.annotation()) 384 return ret 385 386 def annotation(self): 387 at = Annotation() 388 at.type_index = self.u2() 389 at.element_value_pairs = self.element_value_pairs_list() 390 return at 391 392 def element_value_pairs_list(self): 393 ret = [] 394 for i in self.range_u2(): 395 ret.append(self.element_value_pairs()) 396 return ret 397 398 def element_value_pairs(self): 399 ev = Element_value_pairs() 400 ev.element_name_index = self.u2() 401 ev.value = self.element_value() 402 return ev 403 404 def element_value_list(self): 405 ret = [] 406 for i in self.range_u2(): 407 ret.append(self.element_value()) 408 return ret 409 410 def element_value(self): 411 ev = Element_value() 412 et = self.c() 413 const_value_index_use = ['B', 'C', 'D', 'F', 'I', 'J', 'S', 'Z', 's'] 414 if et in const_value_index_use: 415 ev.const_value_index = self.u2() 416 elif et == 'e': 417 ev.type_name_index = self.u2() 418 ev.const_name_index = self.u2() 419 elif et == 'c': 420 ev.class_info_index = self.u2() 421 elif et == '@': 422 ev.annotation_value = self.annotation() 423 elif et == '[': 424 ev.array_value = self.element_value_list() 425 else: 426 print et 427 raise Exception, 'tag of element_value is bad.' 428 return ev 429 430 def entry_point(argv): 431 try: 432 filename = argv[1] 433 except IndexError: 434 print "No source file" 435 return 1 436 437 classFile = JavaClassFile(filename) 438 classFile.parse() 439 print classFile.root.to_string() 440 return 0 441 442 def target(*args): 443 return entry_point, None 444 445 if __name__ == "__main__": 446 import sys 447 entry_point(sys.argv)

Discussion

RPython is a hell. -No map(). -Type restrict dictionary and list. -No generic programming. (Look at a lot of duplicated codes in foo_list() methods...) -No type declaration.

Meta tracing JIT may be the way to go, BUT RPython is NOT obviously.

Comments