[go: up one dir, main page]

Skip to content

Commit

Permalink
string references resolved, pyelftools integration
Browse files Browse the repository at this point in the history
  • Loading branch information
rscloura committed Dec 7, 2020
1 parent 7352b9f commit 783d1a6
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 89 deletions.
197 changes: 127 additions & 70 deletions Cluster.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,25 @@
import Constants

from struct import unpack
from ClassId import ClassId
from Kind import Kind
import TypedData
from UnboxedFieldBitmap import UnboxedFieldBitmap
from Utils import DecodeUtils, NumericUtils, StreamUtils, isTopLevelCid

def getDeserializerForCid(includesCode, cid):
# Abstract deserializer for class IDs: 22, 23, 81, 82
class RODataDeserializer():
def readAlloc(self, snapshot):
count = StreamUtils.readUnsigned(snapshot.stream)
runningOffset = 0
for _ in range(count):
runningOffset += StreamUtils.readUnsigned(snapshot.stream) << Constants.kObjectAlignmentLog2
snapshot.rodata.seek(runningOffset)
snapshot.assignRef(self.getObjectAt(snapshot))

def readFill(self, snapshot):
return

# Class ID: 4
class ClassDeserializer():
def readAlloc(self, snapshot):
Expand Down Expand Up @@ -53,6 +66,7 @@ def readFill(self, snapshot):
StreamUtils.readUnsigned(snapshot.stream, 64)

snapshot.references[refId] = classPtr
snapshot.classes.append(classPtr)

for refId in range(self.startIndex, self.stopIndex):
classPtr = self._readFromTo(snapshot)
Expand Down Expand Up @@ -80,6 +94,7 @@ def readFill(self, snapshot):
snapshot.unboxedFieldsMapAt[classId] = UnboxedFieldBitmap(StreamUtils.readUnsigned(snapshot.stream, 64))

snapshot.references[refId] = classPtr
snapshot.classes.append(classPtr)

def _readFromTo(self, snapshot):
classPtr = { }
Expand All @@ -93,7 +108,7 @@ def _readFromTo(self, snapshot):
classPtr['script'] = StreamUtils.readUnsigned(snapshot.stream)
classPtr['library'] = StreamUtils.readUnsigned(snapshot.stream)
classPtr['typeParameters'] = StreamUtils.readUnsigned(snapshot.stream)
classPtr['superType'] = StreamUtils.readUnsigned(snapshot.stream)
classPtr['superType'] = snapshot.references[StreamUtils.readUnsigned(snapshot.stream)]
classPtr['signatureFunction'] = StreamUtils.readUnsigned(snapshot.stream)
classPtr['constants'] = StreamUtils.readUnsigned(snapshot.stream)
classPtr['declarationType'] = StreamUtils.readUnsigned(snapshot.stream)
Expand Down Expand Up @@ -484,36 +499,40 @@ def readFill(self, snapshot):

snapshot.references[refId] = poolPtr

# Class ID: 21
class PcDescriptorsDeserializer():
def readAlloc(self, snapshot):
self.startIndex = snapshot.nextRefIndex
count = StreamUtils.readUnsigned(snapshot.stream)
for _ in range(count):
length = StreamUtils.readUnsigned(snapshot.stream)
snapshot.assignRef('pc descriptors')
self.stopIndex = snapshot.nextRefIndex

def readFill(self, snapshot):
for refId in range(self.startIndex, self.stopIndex):
length = StreamUtils.readUnsigned(snapshot.stream)
descPtr = { }
descPtr['length'] = length
descPtr['data'] = snapshot.stream.read(length)

snapshot.references[refId] = descPtr

# Aggregate deserializer for class IDs: 22, 23, 81, 82
class RODataDeserializer():
def readAlloc(self, snapshot):
count = StreamUtils.readUnsigned(snapshot.stream)
runningOffset = 0
for _ in range(count):
runningOffset += StreamUtils.readUnsigned(snapshot.stream) << Constants.kObjectAlignmentLog2
snapshot.assignRef('ro data object')

def readFill(self, snapshot):
return
if includesCode:
# Class ID: 21
class PcDescriptorsDeserializer(RODataDeserializer):
def getObjectAt(self, stream):
return 'pc descriptor'

# Class ID: 22
class CodeSourceMapDeserializer(RODataDeserializer):
def getObjectAt(self, stream):
return 'code source map'

# Class ID: 23
class CompressedStackMapsDeserializer(RODataDeserializer):
def getObjectAt(self, stream):
return 'compressed stack maps'
else:
# Class ID: 21
class PcDescriptorsDeserializer():
def readAlloc(self, snapshot):
self.startIndex = snapshot.nextRefIndex
count = StreamUtils.readUnsigned(snapshot.stream)
for _ in range(count):
length = StreamUtils.readUnsigned(snapshot.stream)
snapshot.assignRef('pc descriptors')
self.stopIndex = snapshot.nextRefIndex

def readFill(self, snapshot):
for refId in range(self.startIndex, self.stopIndex):
length = StreamUtils.readUnsigned(snapshot.stream)
descPtr = { }
descPtr['length'] = length
descPtr['data'] = snapshot.stream.read(length)

snapshot.references[refId] = descPtr

# Class ID: 25
class ExceptionHandlersDeserializer():
Expand Down Expand Up @@ -872,36 +891,80 @@ def readFill(self, snapshot):

snapshot.references[refId] = arrayPtr

# Class ID: 81
class OneByteStringDeserializer():
def readAlloc(self, snapshot):
self.startIndex = snapshot.nextRefIndex
count = StreamUtils.readUnsigned(snapshot.stream)
for _ in range(count):
length = StreamUtils.readUnsigned(snapshot.stream)
snapshot.assignRef('one byte string')
self.stopIndex = snapshot.nextRefIndex

def readFill(self, snapshot):
for refId in range(self.startIndex, self.stopIndex):
length = StreamUtils.readUnsigned(snapshot.stream)
StreamUtils.readBool(snapshot.stream) # Canonicalization plays no role in parsing
strPtr = { }
strPtr['hash'] = StreamUtils.readInt(snapshot.stream, 32)
strPtr['length'] = length
strPtr['data'] = ''.join(chr(x) for x in snapshot.stream.read(length))

snapshot.references[refId] = strPtr

# Class ID: 82
class TwoByteStringDeserializer():
def readAlloc(self, snapshot):
self.startIndex = snapshot.nextRefIndex
count = StreamUtils.readUnsigned(snapshot.stream)
for _ in range(count):
length = StreamUtils.readUnsigned(snapshot.stream)
snapshot.assignRef('two-byte string')
self.stopIndex = snapshot.nextRefIndex
if includesCode:
# Class ID: 81
class OneByteStringDeserializer(RODataDeserializer):
def getObjectAt(self, snapshot):
stream = snapshot.rodata
tags, hash_, length = unpack('<LLQ', stream.read(16))
return "".join(chr(x) for x in stream.read(length // 2))

# Class ID: 82
class TwoByteStringDeserializer(RODataDeserializer):
def getObjectAt(self, stream):
return 'two-byte string'

else:
# Class ID: 81
class OneByteStringDeserializer():
def readAlloc(self, snapshot):
self.startIndex = snapshot.nextRefIndex
count = StreamUtils.readUnsigned(snapshot.stream)
for _ in range(count):
length = StreamUtils.readUnsigned(snapshot.stream)
snapshot.assignRef('one byte string')
self.stopIndex = snapshot.nextRefIndex

def readFill(self, snapshot):
for refId in range(self.startIndex, self.stopIndex):
length = StreamUtils.readUnsigned(snapshot.stream)
StreamUtils.readBool(snapshot.stream) # Canonicalization plays no role in parsing
strPtr = { }
strPtr['hash'] = StreamUtils.readInt(snapshot.stream, 32)
strPtr['length'] = length
strPtr['data'] = ''.join(chr(x) for x in snapshot.stream.read(length))

snapshot.references[refId] = strPtr

# Class ID: 82
class TwoByteStringDeserializer():
def readAlloc(self, snapshot):
self.startIndex = snapshot.nextRefIndex
count = StreamUtils.readUnsigned(snapshot.stream)
for _ in range(count):
length = StreamUtils.readUnsigned(snapshot.stream)
snapshot.assignRef('two-byte string')
self.stopIndex = snapshot.nextRefIndex
# Class ID: 81
class OneByteStringDeserializer():
def readAlloc(self, snapshot):
self.startIndex = snapshot.nextRefIndex
count = StreamUtils.readUnsigned(snapshot.stream)
for _ in range(count):
length = StreamUtils.readUnsigned(snapshot.stream)
snapshot.assignRef('one byte string')
self.stopIndex = snapshot.nextRefIndex

def readFill(self, snapshot):
for refId in range(self.startIndex, self.stopIndex):
length = StreamUtils.readUnsigned(snapshot.stream)
StreamUtils.readBool(snapshot.stream) # Canonicalization plays no role in parsing
strPtr = { }
strPtr['hash'] = StreamUtils.readInt(snapshot.stream, 32)
strPtr['length'] = length
strPtr['data'] = ''.join(chr(x) for x in snapshot.stream.read(length))

snapshot.references[refId] = strPtr

# Class ID: 82
class TwoByteStringDeserializer():
def readAlloc(self, snapshot):
self.startIndex = snapshot.nextRefIndex
count = StreamUtils.readUnsigned(snapshot.stream)
for _ in range(count):
length = StreamUtils.readUnsigned(snapshot.stream)
snapshot.assignRef('two-byte string')
self.stopIndex = snapshot.nextRefIndex

# Aggregate deserializer for class IDs: 108, 111, 114, 117, 120, 123, 126, 129, 132, 135, 138, 141, 144, 147
class TypedDataDeserializer():
Expand Down Expand Up @@ -939,12 +1002,6 @@ def readFill(self, snapshot):
if ClassId.isTypedDataClass(cid):
return TypedDataDeserializer(cid)

if includesCode:
if ClassId(cid) is ClassId.PC_DESCRIPTORS or ClassId(cid) is ClassId.CODE_SOURCE_MAP or \
ClassId(cid) is ClassId.COMPRESSED_STACK_MAPS or ClassId(cid) is ClassId.ONE_BYTE_STRING or \
ClassId(cid) is ClassId.TWO_BYTE_STRING:
return RODataDeserializer()

if ClassId(cid) is ClassId.ILLEGAL:
raise Exception('Encountered illegal cluster')
if ClassId(cid) is ClassId.CLASS:
Expand All @@ -970,9 +1027,9 @@ def readFill(self, snapshot):
if ClassId(cid) is ClassId.PC_DESCRIPTORS:
return PcDescriptorsDeserializer()
if ClassId(cid) is ClassId.CODE_SOURCE_MAP:
return RODataDeserializer()
return CodeSourceMapDeserializer()
if ClassId(cid) is ClassId.COMPRESSED_STACK_MAPS:
return RODataDeserializer()
return CompressedStackMapsDeserializer()
if ClassId(cid) is ClassId.EXCEPTION_HANDLERS:
return ExceptionHandlersDeserializer()
if ClassId(cid) is ClassId.UNLINKED_CALL:
Expand Down
9 changes: 8 additions & 1 deletion Constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,11 @@
kNativeFunction = 2

kNullabilityBitSize = 2
kNullabilityBitMask = 3
kNullabilityBitMask = 3

kAppAOTSymbols = [
'_kDartVmSnapshotData',
'_kDartVmSnapshotInstructions',
'_kDartIsolateSnapshotData',
'_kDartIsolateSnapshotInstructions'
]
48 changes: 38 additions & 10 deletions Snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@ class Snapshot:
# hash = version hash (32 byte string)
# features = string array of features

def __init__(self, snapshot):
def __init__(self, data, dataOffset, instructions, instructionsOffset, base=None):
self.stream = BytesIO(data)

# Header
self.stream = BytesIO(snapshot)
self.magic = int.from_bytes(self.stream.read(Constants.kMagicSize), 'little')
self.size = int.from_bytes(self.stream.read(Constants.kLengthSize), 'little')
self.kind = Kind(int.from_bytes(self.stream.read(Constants.kKindSize), 'little'))
self.rodataOffset = NumericUtils.roundUp(self.size + Constants.kMagicSize, Constants.kMaxObjectAlignment)
self.rodata = BytesIO(self.stream.getbuffer()[self.rodataOffset:])
self.hash = self.stream.read(Constants.hashSize).decode('UTF-8')
self.features = list(map(lambda x: x.decode('UTF-8'), StreamUtils.readString(self.stream).split(b'\x20')))

Expand Down Expand Up @@ -53,17 +56,23 @@ def __init__(self, snapshot):
# Initialize references
self.references = ['INVALID'] # Reference count starts at 1
self.nextRefIndex = 1
self.addBaseObjects()
for _ in range(len(self.references), self.numBaseObjects):
self.assignRef('UNKNOWN') # Allocate missing references

# Initialize classes
self.classes = [ ]

if base is not None:
self.references = base.references
self.nextRefIndex = base.nextRefIndex
else:
self.addBaseObjects()

self.unboxedFieldsMapAt = { }

assert(len(self.references) == self.numBaseObjects)
assert(len(self.references) - 1 == self.numBaseObjects) # Reference count starts at 1

self.clusters = [ self.readClusterAlloc() for _ in range(self.numClusters) ]

assert(len(self.references) == self.numObjects)
assert(len(self.references) - 1 == self.numObjects) # Reference count starts at 1

for cluster in self.clusters:
cluster.readFill(self)
Expand Down Expand Up @@ -111,8 +120,6 @@ def addBaseObjects(self):

def readRoots(self):
self.symbolTable = StreamUtils.readRef(self.stream)
print(self.symbolTable)
print(self.references[47809])

def assignRef(self, obj):
self.references.append(obj)
Expand Down Expand Up @@ -181,4 +188,25 @@ def getSummary(self):
prettyString += 'Clusters count: ' + str(self.getNumClusters()) + '\n'
prettyString += 'Field table length: ' + str(self.getFieldTableLength()) + '\n'
prettyString += 'Data image offset: ' + str(self.getDataImageOffset())
return prettyString
return prettyString

# WIP
def getClasses(self):
clazz = list(filter(lambda x: self.references[x['name']] == 'MyApp', self.classes))[0]
clazz['name'] = self.references[clazz['name']]
clazz['functions'] = list(map(lambda f: self._getFunction(f), self.references[clazz['functions']]['data']))
clazz['interfaces'] = list(map(lambda f: self._getInterface(i), self.references[clazz['interfaces']]['data']))

def _getFunction(self, f):
function = { }
function['name'] = self.references[self.references[f]['name']]
function['resultType'] = self.references[f]['resultType']

return function

def _getInterface(self, i):
interface = { }
interface['data']


return interface
Loading

0 comments on commit 783d1a6

Please sign in to comment.