string references resolved, pyelftools integration

rscloura · Dec 7, 2020 · 783d1a6 · 783d1a6
1 parent 7352b9f
commit 783d1a6
Show file tree

Hide file tree

Showing 4 changed files with 203 additions and 89 deletions.
diff --git a/Cluster.py b/Cluster.py
@@ -1,12 +1,25 @@
 import Constants
-
+from struct import unpack
 from ClassId import ClassId
 from Kind import Kind
 import TypedData
 from UnboxedFieldBitmap import UnboxedFieldBitmap
 from Utils import DecodeUtils, NumericUtils, StreamUtils, isTopLevelCid
 
 def getDeserializerForCid(includesCode, cid):
+	# Abstract deserializer for class IDs: 22, 23, 81, 82
+	class RODataDeserializer():
+		def readAlloc(self, snapshot):
+			count = StreamUtils.readUnsigned(snapshot.stream)
+			runningOffset = 0
+			for _ in range(count):
+				runningOffset += StreamUtils.readUnsigned(snapshot.stream) << Constants.kObjectAlignmentLog2
+				snapshot.rodata.seek(runningOffset)
+				snapshot.assignRef(self.getObjectAt(snapshot))
+
+		def readFill(self, snapshot):
+			return
+
 	# Class ID: 4
 	class ClassDeserializer():
 		def readAlloc(self, snapshot):
@@ -53,6 +66,7 @@ def readFill(self, snapshot):
 					StreamUtils.readUnsigned(snapshot.stream, 64)
 
 				snapshot.references[refId] = classPtr
+				snapshot.classes.append(classPtr)
 
 			for refId in range(self.startIndex, self.stopIndex):
 				classPtr = self._readFromTo(snapshot)
@@ -80,6 +94,7 @@ def readFill(self, snapshot):
 					snapshot.unboxedFieldsMapAt[classId] = UnboxedFieldBitmap(StreamUtils.readUnsigned(snapshot.stream, 64))
 
 				snapshot.references[refId] = classPtr
+				snapshot.classes.append(classPtr)
 
 		def _readFromTo(self, snapshot):
 			classPtr = { }
@@ -93,7 +108,7 @@ def _readFromTo(self, snapshot):
 			classPtr['script'] = StreamUtils.readUnsigned(snapshot.stream)
 			classPtr['library'] = StreamUtils.readUnsigned(snapshot.stream)
 			classPtr['typeParameters'] = StreamUtils.readUnsigned(snapshot.stream)
-			classPtr['superType'] = StreamUtils.readUnsigned(snapshot.stream)
+			classPtr['superType'] = snapshot.references[StreamUtils.readUnsigned(snapshot.stream)]
 			classPtr['signatureFunction'] = StreamUtils.readUnsigned(snapshot.stream)
 			classPtr['constants'] = StreamUtils.readUnsigned(snapshot.stream)
 			classPtr['declarationType'] = StreamUtils.readUnsigned(snapshot.stream)
@@ -484,36 +499,40 @@ def readFill(self, snapshot):
 
 				snapshot.references[refId] = poolPtr
 
-	# Class ID: 21
-	class PcDescriptorsDeserializer():
-		def readAlloc(self, snapshot):
-			self.startIndex = snapshot.nextRefIndex
-			count = StreamUtils.readUnsigned(snapshot.stream)
-			for _ in range(count):
-				length = StreamUtils.readUnsigned(snapshot.stream)
-				snapshot.assignRef('pc descriptors')
-			self.stopIndex = snapshot.nextRefIndex
-
-		def readFill(self, snapshot):
-			for refId in range(self.startIndex, self.stopIndex):
-				length = StreamUtils.readUnsigned(snapshot.stream)
-				descPtr = { }
-				descPtr['length'] = length
-				descPtr['data'] = snapshot.stream.read(length)
-
-				snapshot.references[refId] = descPtr
-
-	# Aggregate deserializer for class IDs: 22, 23, 81, 82
-	class RODataDeserializer():
-		def readAlloc(self, snapshot):
-			count = StreamUtils.readUnsigned(snapshot.stream)
-			runningOffset = 0
-			for _ in range(count):
-				runningOffset += StreamUtils.readUnsigned(snapshot.stream) << Constants.kObjectAlignmentLog2
-				snapshot.assignRef('ro data object')
-
-		def readFill(self, snapshot):
-			return
+	if includesCode:
+		# Class ID: 21
+		class PcDescriptorsDeserializer(RODataDeserializer):
+			def getObjectAt(self, stream):
+				return 'pc descriptor'
+
+		# Class ID: 22
+		class CodeSourceMapDeserializer(RODataDeserializer):
+			def getObjectAt(self, stream):
+				return 'code source map'
+
+		# Class ID: 23
+		class CompressedStackMapsDeserializer(RODataDeserializer):
+			def getObjectAt(self, stream):
+				return 'compressed stack maps'
+	else:
+		# Class ID: 21
+		class PcDescriptorsDeserializer():
+			def readAlloc(self, snapshot):
+				self.startIndex = snapshot.nextRefIndex
+				count = StreamUtils.readUnsigned(snapshot.stream)
+				for _ in range(count):
+					length = StreamUtils.readUnsigned(snapshot.stream)
+					snapshot.assignRef('pc descriptors')
+				self.stopIndex = snapshot.nextRefIndex
+
+			def readFill(self, snapshot):
+				for refId in range(self.startIndex, self.stopIndex):
+					length = StreamUtils.readUnsigned(snapshot.stream)
+					descPtr = { }
+					descPtr['length'] = length
+					descPtr['data'] = snapshot.stream.read(length)
+
+					snapshot.references[refId] = descPtr
 
 	# Class ID: 25
 	class ExceptionHandlersDeserializer():
@@ -872,36 +891,80 @@ def readFill(self, snapshot):
 
 				snapshot.references[refId] = arrayPtr
 
-	# Class ID: 81
-	class OneByteStringDeserializer():
-		def readAlloc(self, snapshot):
-			self.startIndex = snapshot.nextRefIndex
-			count = StreamUtils.readUnsigned(snapshot.stream)
-			for _ in range(count):
-				length = StreamUtils.readUnsigned(snapshot.stream)
-				snapshot.assignRef('one byte string')
-			self.stopIndex = snapshot.nextRefIndex
-
-		def readFill(self, snapshot):
-			for refId in range(self.startIndex, self.stopIndex):
-				length = StreamUtils.readUnsigned(snapshot.stream)
-				StreamUtils.readBool(snapshot.stream) # Canonicalization plays no role in parsing
-				strPtr = { }
-				strPtr['hash'] = StreamUtils.readInt(snapshot.stream, 32)
-				strPtr['length'] = length
-				strPtr['data'] = ''.join(chr(x) for x in snapshot.stream.read(length))
-
-				snapshot.references[refId] = strPtr
-
-	# Class ID: 82
-	class TwoByteStringDeserializer():
-		def readAlloc(self, snapshot):
-			self.startIndex = snapshot.nextRefIndex
-			count = StreamUtils.readUnsigned(snapshot.stream)
-			for _ in range(count):
-				length = StreamUtils.readUnsigned(snapshot.stream)
-				snapshot.assignRef('two-byte string')
-			self.stopIndex = snapshot.nextRefIndex
+	if includesCode:
+		# Class ID: 81
+		class OneByteStringDeserializer(RODataDeserializer):
+			def getObjectAt(self, snapshot):
+				stream = snapshot.rodata
+				tags, hash_, length = unpack('<LLQ', stream.read(16))
+				return "".join(chr(x) for x in stream.read(length // 2))
+
+		# Class ID: 82
+		class TwoByteStringDeserializer(RODataDeserializer):
+			def getObjectAt(self, stream):
+				return 'two-byte string'
+
+	else:
+				# Class ID: 81
+		class OneByteStringDeserializer():
+			def readAlloc(self, snapshot):
+				self.startIndex = snapshot.nextRefIndex
+				count = StreamUtils.readUnsigned(snapshot.stream)
+				for _ in range(count):
+					length = StreamUtils.readUnsigned(snapshot.stream)
+					snapshot.assignRef('one byte string')
+				self.stopIndex = snapshot.nextRefIndex
+
+			def readFill(self, snapshot):
+				for refId in range(self.startIndex, self.stopIndex):
+					length = StreamUtils.readUnsigned(snapshot.stream)
+					StreamUtils.readBool(snapshot.stream) # Canonicalization plays no role in parsing
+					strPtr = { }
+					strPtr['hash'] = StreamUtils.readInt(snapshot.stream, 32)
+					strPtr['length'] = length
+					strPtr['data'] = ''.join(chr(x) for x in snapshot.stream.read(length))
+
+					snapshot.references[refId] = strPtr
+
+		# Class ID: 82
+		class TwoByteStringDeserializer():
+			def readAlloc(self, snapshot):
+				self.startIndex = snapshot.nextRefIndex
+				count = StreamUtils.readUnsigned(snapshot.stream)
+				for _ in range(count):
+					length = StreamUtils.readUnsigned(snapshot.stream)
+					snapshot.assignRef('two-byte string')
+				self.stopIndex = snapshot.nextRefIndex
+		# Class ID: 81
+		class OneByteStringDeserializer():
+			def readAlloc(self, snapshot):
+				self.startIndex = snapshot.nextRefIndex
+				count = StreamUtils.readUnsigned(snapshot.stream)
+				for _ in range(count):
+					length = StreamUtils.readUnsigned(snapshot.stream)
+					snapshot.assignRef('one byte string')
+				self.stopIndex = snapshot.nextRefIndex
+
+			def readFill(self, snapshot):
+				for refId in range(self.startIndex, self.stopIndex):
+					length = StreamUtils.readUnsigned(snapshot.stream)
+					StreamUtils.readBool(snapshot.stream) # Canonicalization plays no role in parsing
+					strPtr = { }
+					strPtr['hash'] = StreamUtils.readInt(snapshot.stream, 32)
+					strPtr['length'] = length
+					strPtr['data'] = ''.join(chr(x) for x in snapshot.stream.read(length))
+
+					snapshot.references[refId] = strPtr
+
+		# Class ID: 82
+		class TwoByteStringDeserializer():
+			def readAlloc(self, snapshot):
+				self.startIndex = snapshot.nextRefIndex
+				count = StreamUtils.readUnsigned(snapshot.stream)
+				for _ in range(count):
+					length = StreamUtils.readUnsigned(snapshot.stream)
+					snapshot.assignRef('two-byte string')
+				self.stopIndex = snapshot.nextRefIndex
 
 	# Aggregate deserializer for class IDs: 108, 111, 114, 117, 120, 123, 126, 129, 132, 135, 138, 141, 144, 147
 	class TypedDataDeserializer():
@@ -939,12 +1002,6 @@ def readFill(self, snapshot):
 	if ClassId.isTypedDataClass(cid):
 		return TypedDataDeserializer(cid)
 
-	if includesCode:
-		if ClassId(cid) is ClassId.PC_DESCRIPTORS or ClassId(cid) is ClassId.CODE_SOURCE_MAP or \
-		ClassId(cid) is ClassId.COMPRESSED_STACK_MAPS or ClassId(cid) is ClassId.ONE_BYTE_STRING or \
-		ClassId(cid) is ClassId.TWO_BYTE_STRING:
-			return RODataDeserializer()
-
 	if ClassId(cid) is ClassId.ILLEGAL:
 		raise Exception('Encountered illegal cluster')
 	if ClassId(cid) is ClassId.CLASS:
@@ -970,9 +1027,9 @@ def readFill(self, snapshot):
 	if ClassId(cid) is ClassId.PC_DESCRIPTORS:
 		return PcDescriptorsDeserializer()
 	if ClassId(cid) is ClassId.CODE_SOURCE_MAP:
-		return RODataDeserializer()
+		return CodeSourceMapDeserializer()
 	if ClassId(cid) is ClassId.COMPRESSED_STACK_MAPS:
-		return RODataDeserializer()
+		return CompressedStackMapsDeserializer()
 	if ClassId(cid) is ClassId.EXCEPTION_HANDLERS:
 		return ExceptionHandlersDeserializer()
 	if ClassId(cid) is ClassId.UNLINKED_CALL:

diff --git a/Constants.py b/Constants.py
@@ -33,4 +33,11 @@
 kNativeFunction = 2
 
 kNullabilityBitSize = 2
-kNullabilityBitMask = 3
+kNullabilityBitMask = 3
+
+kAppAOTSymbols = [
+    '_kDartVmSnapshotData',
+    '_kDartVmSnapshotInstructions',
+    '_kDartIsolateSnapshotData',
+    '_kDartIsolateSnapshotInstructions'
+]
diff --git a/Snapshot.py b/Snapshot.py
@@ -15,12 +15,15 @@ class Snapshot:
 	# hash = version hash (32 byte string)
 	# features = string array of features
 
-	def __init__(self, snapshot):
+	def __init__(self, data, dataOffset, instructions, instructionsOffset, base=None):
+		self.stream = BytesIO(data)
+
 		# Header
-		self.stream = BytesIO(snapshot)
 		self.magic = int.from_bytes(self.stream.read(Constants.kMagicSize), 'little')
 		self.size = int.from_bytes(self.stream.read(Constants.kLengthSize), 'little')
 		self.kind = Kind(int.from_bytes(self.stream.read(Constants.kKindSize), 'little'))
+		self.rodataOffset = NumericUtils.roundUp(self.size + Constants.kMagicSize, Constants.kMaxObjectAlignment)
+		self.rodata = BytesIO(self.stream.getbuffer()[self.rodataOffset:])
 		self.hash = self.stream.read(Constants.hashSize).decode('UTF-8')
 		self.features = list(map(lambda x: x.decode('UTF-8'), StreamUtils.readString(self.stream).split(b'\x20')))
 
@@ -53,17 +56,23 @@ def __init__(self, snapshot):
 		# Initialize references
 		self.references = ['INVALID'] # Reference count starts at 1
 		self.nextRefIndex = 1
-		self.addBaseObjects()
-		for _ in range(len(self.references), self.numBaseObjects):
-			self.assignRef('UNKNOWN') # Allocate missing references
+
+		# Initialize classes
+		self.classes = [ ]
+
+		if base is not None:
+			self.references = base.references
+			self.nextRefIndex = base.nextRefIndex
+		else:
+			self.addBaseObjects()
 
 		self.unboxedFieldsMapAt = { }
 
-		assert(len(self.references) == self.numBaseObjects)
+		assert(len(self.references) - 1 == self.numBaseObjects) # Reference count starts at 1
 
 		self.clusters = [ self.readClusterAlloc() for _ in range(self.numClusters) ]
 
-		assert(len(self.references) == self.numObjects)
+		assert(len(self.references) - 1 == self.numObjects) # Reference count starts at 1
 
 		for cluster in self.clusters:
 			cluster.readFill(self)
@@ -111,8 +120,6 @@ def addBaseObjects(self):
 
 	def readRoots(self):
 		self.symbolTable = StreamUtils.readRef(self.stream)
-		print(self.symbolTable)
-		print(self.references[47809])
 
 	def assignRef(self, obj):
 		self.references.append(obj)
@@ -181,4 +188,25 @@ def getSummary(self):
 		prettyString += 'Clusters count: ' + str(self.getNumClusters()) + '\n'
 		prettyString += 'Field table length: ' + str(self.getFieldTableLength()) + '\n'
 		prettyString += 'Data image offset: ' + str(self.getDataImageOffset())
-		return prettyString
+		return prettyString
+
+	# WIP
+	def getClasses(self):
+		clazz = list(filter(lambda x: self.references[x['name']] == 'MyApp', self.classes))[0]
+		clazz['name'] = self.references[clazz['name']]
+		clazz['functions'] = list(map(lambda f: self._getFunction(f), self.references[clazz['functions']]['data']))
+		clazz['interfaces'] = list(map(lambda f: self._getInterface(i), self.references[clazz['interfaces']]['data']))
+
+	def _getFunction(self, f):
+		function = { }
+		function['name'] = self.references[self.references[f]['name']]
+		function['resultType'] = self.references[f]['resultType']
+
+		return function
+
+	def _getInterface(self, i):
+		interface = { }
+		interface['data']
+
+
+		return interface