【JVM学习】Class文件解析
简介
最近为了加深对于JAVA的理解,在复习JVM,这里面的内容大部分都是比较确定的、文档性质的内容,目前并没有特别的总结。但是看到类文件结构,手痒,想写了个解析文件的解析器,那就简单记录下吧。
JAVA类文件结构
详细内容不记了,太多了,有兴趣的同学看虚拟机相关书籍了解,这里只简单介绍下。
1 类文件使用类似结构体的伪结构,基本数据类型有两种:无符号数和表。无符号数有1、2、4、8四种长度;表可以由无符号数或其它复合类型作为元素,组成数据列表。
2 复合类型也只由上述两种基本类型组成。
3 类文件按固定结构存储,表类型例外,因为里面的元素可以是不同的复合类型,因此排列顺序不一定是固定顺序。
4 常量池。类名、函数名、字符串等信息,类文件的解析最终都要在常量池中找到对应的索引。不同的常量结构体用tag(数字)区分。
5 属性表。不管是field、method还是类本身,都有属性表,不同的属性结构体用属性名称(字符串)区分。
设计思路
做过协议解析的同学肯定了解,类似这种情况,一般都会使用TLV(type、length、value)结构解析,知道类型,才知道长度,知道长度才能完整读出该类型对应的数据。无论是协议,还是class、elf文件,这种方式都是常规操作。使用这种方式,除了定义完整的类型外,还要在解析中处理不同的类型。
那这次用的是JAVA,想换一个不一样的思路,即使用泛型和反射,类型的定义不可避免,那么是否可以减少解析过程中对于类型的处理?
基于这个想法,简单的按照类文件结构进行了相关结构体的定义,并实现了一版,直接上代码吧,我个人觉得基本达到了目的, 不算打印方法,250行左右,解析逻辑比较简单。
public class Parser {public Parser() {initConstantPoolMap();initAttributeMap();}/* 复合类型解析* @param obj 待解析填充的对象* @param inputStream 文件流*/public void parse(Object obj, DataInputStream inputStream) throws ClassNotFoundException, IllegalAccessException, IOException, InstantiationException {Field[] fields = obj.getClass().getDeclaredFields();for (Field field : fields) {System.out.println(field.getName() + " : " + field.getGenericType());field.setAccessible(true);Type type = field.getGenericType();Class fieldClass = null;Class[] actualClassArguments = new Class[2];if (type instanceof Class) {fieldClass = (Class) type;} else if (type instanceof ParameterizedType) {ParameterizedType parameterizedType = (ParameterizedType) type;fieldClass = (Class) parameterizedType.getRawType();Type[] actualTypeArguments = parameterizedType.getActualTypeArguments();actualClassArguments = new Class[actualTypeArguments.length];for (int i = 0; i < actualTypeArguments.length; i++) {actualClassArguments[i] = (Class) actualTypeArguments[i];}} else {throw new InstantiationException("illegal field type: " + field.getGenericType());}if (fieldClass.getSuperclass().equals(AtomicType.class)) {AtomicType o = parseAtomic(fieldClass, inputStream);field.set(obj, o);} else if (fieldClass.equals(Table.class)) {Table table = (Table) field.get(obj);table.setCountClass(actualClassArguments[0]);table.setElementClass(actualClassArguments[1]);parseTable(table, inputStream);} else {field.set(obj, parseMeta(fieldClass, inputStream));}}}/* 原子数据类型解析(类文件定义的四种无符号数据类型)* @param cl 带解析的类信息* @param inputStream 文件流* @return 解析后实例的的对象*/private AtomicType parseAtomic(Class<?> cl, DataInputStream inputStream) throws IllegalAccessException, InstantiationException, IOException {AtomicType obj = (AtomicType) cl.newInstance();inputStream.read(obj.getBytes());System.out.println(cl.getTypeName() + " parsed: " + obj.getValue());return obj;}/* 通用表结构解析* @param table 表对象* @param inputStream 文件流*/private void parseTable(Table table, DataInputStream inputStream) throws IllegalAccessException, InstantiationException, ClassNotFoundException, IOException {AtomicType count = parseAtomic(table.getCountClass(), inputStream);table.setCount(count);System.out.println("table count: " + count.getValue());if (count.getIntValue() == 0) {return;} else if (table.getElementClass().getSuperclass().equals(Number.class)) {parseNumber(table, inputStream);} else if (table.getElementClass().getTypeName().equals(cp_info.class.getTypeName())) {parseConstantPool(table, inputStream);} else if (table.getElementClass().getTypeName().equals(attribute_info.class.getTypeName())) {parseAttribute(table, inputStream);} else {parseMetas(table, inputStream);}}/* 其它复合类型的表结构解析* @param table 表对象* @param inputStream 文件流*/private void parseMetas(Table table, DataInputStream inputStream) throws ClassNotFoundException, IllegalAccessException, InstantiationException, IOException {for (int i = 0; i < table.getCount().getIntValue(); i++) {table.getData().add(parseMeta(table.getElementClass(), inputStream));}}/* 复合类型解析(根据类型实例化后填充)* @param cl 待解析类信息* @param inputStream 文件流* @return 解析后的实例*/private Object parseMeta(Class cl, DataInputStream inputStream) throws ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {Object o = cl.newInstance();parse(o, inputStream);return o;}/* 解析原子数据类型(非class文件定义的无符号数)* @param table 表对象* @param inputStream 文件流*/private void parseNumber(Table table, DataInputStream inputStream) throws IOException, InstantiationException {for (int i = 0; i < table.getCount().getIntValue(); i++) {if (table.getElementClass().equals(Byte.class)) {table.getData().add(inputStream.readByte());} else if (table.getElementClass().equals(Short.class)) {table.getData().add(inputStream.readShort());} else {throw new InstantiationException("illegal number class : " + table.getElementClass());}}}/* 解析常量池* @param table 表对象* @param inputStream 文件流*/private void parseConstantPool(Table table, DataInputStream inputStream) throws ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {for (int i = 0; i < table.getCount().getIntValue() - 1; i++) {table.getData().add(parseConstantPool(inputStream));}}/* 解析单个常量对象* @param inputStream 表对象* @return 常量对象*/private Object parseConstantPool(DataInputStream inputStream) throws IllegalAccessException, IOException, InstantiationException, ClassNotFoundException {AtomicType tag = parseAtomic(u1.class, inputStream);Class cl = constPoolMap.get(tag.getIntValue());if (cl == null) {throw new InstantiationException("not found const pool class: " + tag.getValue());}Object obj = cl.newInstance();parse(obj, inputStream);System.out.println("parse constant pool: tag = " + tag.getValue());return obj;}/* 解析属性表* @param table 表对象* @param inputStream 文件流*/private void parseAttribute(Table table, DataInputStream inputStream) throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException {for (int i = 0; i < table.getCount().getIntValue(); i++) {table.getData().add(parseAttribute(inputStream));}}/* 解析单个属性* @param inputStream 文件流* @return 单个属性对象*/private Object parseAttribute(DataInputStream inputStream) throws ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {attribute_info attr = new attribute_info();parse(attr, inputStream);String name = getUtf8((int) attr.getAttribute_name_index().getValue());if (name == null) {throw new InstantiationException("not found attribute name: " + attr.getAttribute_name_index().getValue());}System.out.println("parse attribute: " + name);Class cl = attributeMap.get(name);if (cl == null) {throw new InstantiationException("not found attribute: " + name);}return parseMeta(cl, inputStream);}/* 获取常量池中的对象* @param index 常量池索引* @return 常量对象*/private cp_info getConstant(int index) {if (index == 0 || index >= classInfo.getConstant_pool().getData().size()) {return null;}return classInfo.getConstant_pool().getData().get(index - 1);}/* 获取字符串对象* @param index 索引* @return 字符串*/private String getUtf8(int index) {cp_info c = getConstant(index);if (c == null) {return null;}if (!(c instanceof utf8_info)) {System.out.println("not utf8 constant");return null;}utf8_info utf8 = (utf8_info) c;return utf8.toString();}// 类文件对象private static class_info classInfo = new class_info();// 常量池类型映射表private static Map<Integer, Class> constPoolMap = new HashMap<>();// 属性映射表private static Map<String, Class> attributeMap = new HashMap<>();private static void initConstantPoolMap() {constPoolMap.put(1, utf8_info.class);constPoolMap.put(3, integer_info.class);constPoolMap.put(4, float_info.class);constPoolMap.put(5, long_info.class);constPoolMap.put(6, double_info.class);constPoolMap.put(7, constant_class_info.class);constPoolMap.put(8, string_info.class);constPoolMap.put(9, fieldref_info.class);constPoolMap.put(10, methodref_info.class);constPoolMap.put(11, interfacemethodref_info.class);constPoolMap.put(12, nametype_info.class);}private static void initAttributeMap() {attributeMap.put("Code", code_info.class);attributeMap.put("Exceptions", throws_info.class);attributeMap.put("LineNumberTable", LineNumberTable.class);attributeMap.put("LocalVariableTable", LocalVariableTable.class);attributeMap.put("SourceFile", SourceFile.class);attributeMap.put("ConstantValue", ConstantValue.class);attributeMap.put("InnerClasses", InnerClasses.class);}private static void print(Class cl, Object obj, String prefix, StringBuilder sb) throws IllegalAccessException, ClassNotFoundException, InstantiationException {Field[] fields = cl.getDeclaredFields();sb.append(prefix).append(cl.getSimpleName()).append(":{\\n");for (Field field : fields) {field.setAccessible(true);Type type = field.getGenericType();Class fieldClass = null;Class[] actualClassArguments = new Class[2];if (type instanceof Class) {fieldClass = (Class) type;} else if (type instanceof ParameterizedType) {ParameterizedType parameterizedType = (ParameterizedType) type;fieldClass = (Class) parameterizedType.getRawType();Type[] actualTypeArguments = parameterizedType.getActualTypeArguments();actualClassArguments = new Class[actualTypeArguments.length];for (int i = 0; i < actualTypeArguments.length; i++) {actualClassArguments[i] = (Class) actualTypeArguments[i];}} else {throw new InstantiationException("illegal field type: " + field.getGenericType());}Object o = field.get(obj);if (fieldClass.getSuperclass().equals(AtomicType.class) && field.getName().endsWith("_index")) {AtomicType v = (AtomicType) o;sb.append(prefix).append(field.getName() + ": #" + v.getValue() + " : ");printConstant(v.getIntValue(), sb);sb.append("\\n");} else if (fieldClass.getSuperclass().equals(AtomicType.class)) {AtomicType v = (AtomicType) o;sb.append(prefix + field.getName() + " : " + v.getValue()).append("\\n");} else if (fieldClass.equals(Table.class)) {Table table = (Table) o;sb.append(prefix).append(field.getName() + ": ").append("[\\n");for (int i = 0; i < table.getData().size(); i++) {Class fcl = table.getData().get(i).getClass();if (table.getElementClass().equals(cp_info.class)) {sb.append(prefix).append(" ").append("#" + (i + 1) + " = " + table.getData().get(i).toString()).append("\\n");} else if (fcl.equals(Byte.class) || fcl.equals(Short.class)) {sb.append(table.getData().get(i));} else {print(fcl, table.getData().get(i), prefix + " ", sb);}}if (table.getCount().getIntValue() > 0 &&(table.getElementClass().equals(Byte.class) || table.getElementClass().equals(Short.class))) {sb.append("\\n");}sb.append(prefix).append("]\\n");} else {sb.append(prefix).append(field.getName() + ": ").append("\\n");Class fcl = Class.forName(field.getGenericType().getTypeName());print(fcl, o, prefix + " ", sb);}}sb.append(prefix).append("}\\n");}private static void printConstant(int index, StringBuilder sb) throws IllegalAccessException {if (index == 0) {return;}cp_info info = classInfo.getConstant_pool().getData().get(index - 1);if (info instanceof utf8_info) {sb.append(info.toString());return;}sb.append(" -> " + info.getClass().getSimpleName() + " :");Field[] fields = info.getClass().getDeclaredFields();for (Field field : fields) {if (!field.getName().endsWith("_index")) {continue;}field.setAccessible(true);AtomicType o = (AtomicType) field.get(info);sb.append(field.getName() + " :#");sb.append(o.getValue() + " -> ");printConstant(o.getIntValue(), sb);sb.append("; ");}}public static void main(String[] args) throws IOException {Parser parser = new Parser();FileInputStream fileInputStream = null;try {fileInputStream = new FileInputStream("Bean.class");// Create a new DataInputStream object with the FileInputStream objectDataInputStream dataInputStream = new DataInputStream(fileInputStream);parser.parse(classInfo, dataInputStream);StringBuilder sb = new StringBuilder();print(class_info.class, classInfo, "", sb);System.out.println(sb.toString());} catch (Exception e) {e.printStackTrace();} finally {fileInputStream.close();}}}
再补充几个结构体定义实例:
public class Table<N extends AtomicType, T> {private N count;private LinkedList<T> data = new LinkedList<>();private Class countClass;private Class elementClass;public Table() {}public N getCount() {return count;}public void setCount(N count) {this.count = count;}public LinkedList<T> getData() {return data;}public void setData(LinkedList<T> data) {this.data = data;}public Class getCountClass() {return countClass;}public Class getElementClass() {return elementClass;}public void setCountClass(Class countClass) {this.countClass = countClass;}public void setElementClass(Class elementClass) {this.elementClass = elementClass;}public String toString() {return "count: " + count;}
}public abstract class AtomicType<T extends Number> {private int length;private byte[] bytes;private T value;public int getLength() {return length;}public void setLength(int length) {this.length = length;}public T getValue() {if (value != null) {return value;}parse();return value;}public void setValue(T value) {this.value = value;}public byte[] getBytes() {return bytes;}public void setBytes(byte[] bytes) {this.bytes = bytes;}public abstract void parse();public int getIntValue() {return getValue().intValue();}
}public class u2 extends AtomicType<Short> {public u2() {setLength(2);setBytes(new byte[2]);}@Overridepublic void parse() {short x = (short) ((((short)(getBytes()[0])) << 8) + (((short)(getBytes()[1])) << 0));setValue(x);}
}
public class attribute_info {public u2 attribute_name_index;public u4 attribute_length;public u2 getAttribute_name_index() {return attribute_name_index;}
}
public class cp_info implements Printable {public u1 tag;
}
public class class_info {public u4 magic;public u2 minor_version;public u2 major_version;public Table<u2, cp_info> constant_pool = new Table<>();public u2 access_flags;public u2 this_class;public u2 super_class;public Table<u2, Short> interfaces = new Table<>();public Table<u2, field_info> fields = new Table<>();public Table<u2, method_info> methods = new Table<>();public Table<u2, attribute_info> attributes = new Table<>();public Table<u2, cp_info> getConstant_pool() {return constant_pool;}
}
总结
不算完美,阶段性成果吧。关于泛型,值得单独写一篇记录一下。