mirror of
https://github.com/Ragora/T2-EngineScraper.git
synced 2026-01-19 19:14:45 +00:00
482 lines
18 KiB
Python
482 lines
18 KiB
Python
"""
|
|
scraper.py
|
|
|
|
The scraper class is used to scrape data from a decompiled form of the
|
|
Tribes 2 game executable in order to build a tree of sorts that can be
|
|
used for mapping out the various functions, global variables and
|
|
datablock types & their associated properties.
|
|
|
|
This software is licensed under the MIT license. Refer to LICENSE.txt for
|
|
details.
|
|
Copyright (c) 2016 Robert MacGregor
|
|
"""
|
|
|
|
import re
|
|
import string
|
|
|
|
class EngineComponent(object):
|
|
"""
|
|
The base representation type for all the data the scraper will be
|
|
pulling from the pseudo source code.
|
|
"""
|
|
name = None
|
|
address = None
|
|
type_name = None
|
|
description = None
|
|
|
|
def __init__(self, name, address, type_name, description):
|
|
self.name = name
|
|
self.address = address
|
|
self.type_name = type_name
|
|
self.description = description
|
|
|
|
class Function(EngineComponent):
|
|
"""
|
|
The virtual representation of a callable engine function from Torque
|
|
Script. It contains a description, the address, argument information
|
|
and if applicable, the object typename it is bound to.
|
|
"""
|
|
min_args = None
|
|
max_args = None
|
|
|
|
def __init__(self, name, address, type_name, description, min_args, max_args):
|
|
EngineComponent.__init__(self, name, address, type_name, description)
|
|
|
|
self.min_args = min_args
|
|
self.max_args = max_args
|
|
|
|
class GlobalVariable(EngineComponent):
|
|
def __init__(self, name, address, type_name):
|
|
EngineComponent.__init__(self, name, address, type_name, None)
|
|
|
|
class Datablock(EngineComponent):
|
|
"""
|
|
The virtual representation of the Torque Game Engine datablock used
|
|
for synchronization of custom simulation parameters across the network.
|
|
"""
|
|
properties = None
|
|
|
|
class Property(EngineComponent):
|
|
def __init__(self, name, address, type_name):
|
|
EngineComponent.__init__(self, name, address, type_name, None)
|
|
|
|
def __init__(self, name):
|
|
EngineComponent.__init__(self, name, None, None, None)
|
|
|
|
self.properties = { }
|
|
|
|
class Scraper(object):
|
|
"""
|
|
The meat and potatoes of the scraper system. This is your primary
|
|
class to instantiate
|
|
"""
|
|
|
|
_global_function_registry = ["426650", "426590", "4265D0", "426550", "426610"]
|
|
"""
|
|
The global function registry is used by the scraper to determine prefixes
|
|
of all the sub routines that register global functions in the Tribes 2
|
|
engine. They take the following format: sub_######
|
|
"""
|
|
|
|
_type_function_registry = ["426450", "426510", "426450", "425960"]
|
|
"""
|
|
The type function registry is the same as the global function registry,
|
|
except used for type contextual functions. The registration signatures
|
|
for these functions are slightly different.
|
|
"""
|
|
|
|
_datablock_property_registry = ["423F20"]
|
|
"""
|
|
Registration subroutines regarding static fields of datablocks.
|
|
"""
|
|
|
|
_global_value_registry = ["4263B0"]
|
|
"""
|
|
Registration subroutines regarding globally addressible variables.
|
|
"""
|
|
|
|
_registration_expression_template = "sub_(%s)[^;{]+;[^\"]"
|
|
"""
|
|
Base regular expression used for matching the various registration
|
|
calls. It is formatted with the above values in-place for operating
|
|
within the different contexts.
|
|
"""
|
|
|
|
_datablock_type_table = {
|
|
"61E7A0": "ExplosionData",
|
|
"5B4F60": "WaterBlockData",
|
|
"612400": "WheeledVehicleData",
|
|
"6161E0": "HoverVehicleData",
|
|
"5CE810": "PlayerData",
|
|
"6034C0": "ItemData",
|
|
"69C170": "TriggerData",
|
|
"50DC70": "AudioProfileData",
|
|
"62B3C0": "LinearProjectile",
|
|
"60F820": "FlyingVehicleData",
|
|
"6370D0": "SeekingProjectileData",
|
|
"69B0F0": "PrecipitationData",
|
|
"641480": "SniperProjectileData",
|
|
"66A270": "SensorData",
|
|
|
|
"6303F0": "GrenadeProjectileData",
|
|
"6333D0": "GrenadeProjectileData",
|
|
|
|
"694B40": "TracerProjectileData",
|
|
"6470D0": "TargetProjectileData",
|
|
|
|
"653E10": "TurretData",
|
|
"654AE0": "TurretData",
|
|
"5E4C20": "TurretData", # Camera?
|
|
|
|
"654330": "TurretImageData", # TurretData?
|
|
|
|
"64E2B0": "LightningData",
|
|
"627150": "LightningData",
|
|
|
|
"621DF0": "ParticleEmitterData",
|
|
"622E60": "ParticleData",
|
|
|
|
"644910": "ELFProjectileData",
|
|
"64A860": "ELFProjectileData",
|
|
|
|
"5F4D90": "ShapeBaseImageData",
|
|
|
|
"602940": "StaticShapeData",
|
|
"66B000": "SpawnSphere",
|
|
|
|
"6099E0": "VehicleData",
|
|
|
|
"47D880": "AI Task?",
|
|
|
|
"63D870": "LinearFlareData",
|
|
|
|
"59A870": "TerrainData",
|
|
"68C4B0": "ShockwaveData",
|
|
|
|
"4B5840": "CorpseData",
|
|
|
|
"619B30": "Sky",
|
|
"5AB310": "Sky",
|
|
|
|
"68AAA0": "PhysicalZone",
|
|
|
|
"626240": "Debris",
|
|
"684000": "Debris",
|
|
|
|
"6751A0": "ForceFieldBareData",
|
|
|
|
"631A50": "ProjectileData", # Base?
|
|
"69AF10": "FireballAtmosphere",
|
|
}
|
|
"""
|
|
The datablock type table is used when looking up the context of a given static datablock field registration
|
|
call. This context is merely the address of the calling subroutine, so multiple entries may have to be added
|
|
for a single datablock type.
|
|
"""
|
|
|
|
# Hacks
|
|
string_expression = re.compile("\" *\S+\" *")
|
|
|
|
# Global method material
|
|
global_method_add_expression = re.compile(_registration_expression_template % string.join(_global_function_registry, "|"), re.IGNORECASE)
|
|
type_method_add_expression = re.compile(_registration_expression_template % string.join(_type_function_registry, "|"), re.IGNORECASE)
|
|
datablock_property_add_expression = re.compile(_registration_expression_template % string.join(_datablock_property_registry, "|"), re.IGNORECASE)
|
|
global_value_add_expression = re.compile(_registration_expression_template % string.join(_global_value_registry, "|"), re.IGNORECASE)
|
|
|
|
type_function_total = 0
|
|
global_function_count = 0
|
|
type_function_counts = None
|
|
|
|
primitive_type_mapping = [
|
|
"Unknown",
|
|
"Integer",
|
|
"Unknown",
|
|
"Boolean",
|
|
"Unknown",
|
|
"Float",
|
|
"Unknown"
|
|
]
|
|
|
|
# Dictionary containing type name to inheritance list mappings
|
|
type_name_inheritance = {
|
|
"HTTPObject": "TCPObject",
|
|
"TCPObject": "SimObject",
|
|
"FileObject": "SimObject",
|
|
"Item": "ShapeBase",
|
|
"ShapeBase": "GameBase",
|
|
"GameBase": "SceneObject",
|
|
"SceneObject": "NetObject",
|
|
"NetObject": "SimObject",
|
|
"Player": "ShapeBase",
|
|
"DebugView": "GuiTextCtrl",
|
|
"GuiTextCtrl": "GuiControl",
|
|
"GuiControl": "SimGroup",
|
|
"SimGroup": "SimSet",
|
|
"SimSet": "SimObject",
|
|
"Canvas": "GuiCanvas",
|
|
"GuiCanvas": "GuiControl",
|
|
"SimpleNetObject": "SimObject",
|
|
"AIObjectiveQ": "SimSet",
|
|
"ForceFieldBare": "GameBase",
|
|
"AIConnection": "GameConnection",
|
|
"GameConnection": "NetConnection",
|
|
"NetConnection": "SimGroup",
|
|
"Turret": "StaticShape",
|
|
"StaticShape": "ShapeBase",
|
|
"TerrainBlock": "SceneObject",
|
|
"InteriorInstance": "SceneObject",
|
|
"StaticShape": "ShapeBase",
|
|
"Trigger": "GameBase",
|
|
"WaterBlock": "SceneObject",
|
|
"FireballAtmosphere": "GameBase",
|
|
"MissionArea": "NetObject",
|
|
"TSStatic": "SceneObject",
|
|
# Projectile Types
|
|
"LinearProjectile": "Projectile",
|
|
"Projectile": "GameBase",
|
|
"EnergyProjectile": "GrenadeProjectile",
|
|
"GrenadeProjectile": "Projectile",
|
|
"TargetProjectile": "Projectile",
|
|
# Vehicle Types
|
|
"HoverVehicle": "Vehicle",
|
|
"Vehicle": "ShapeBase",
|
|
"FlyingVehicle": "Vehicle",
|
|
"WheeledVehicle": "Vehicle",
|
|
# Datablock Types
|
|
"HoverVehicleData": "VehicleData",
|
|
"VehicleData": "ShapeBaseData",
|
|
"FlyingVehicleData": "VehicleData",
|
|
"WheeledVehicleData": "VehicleData",
|
|
"ForceFieldBareData": "GameBaseData",
|
|
"LinearProjectileData": "ProjectileData",
|
|
"EnergyProjectileData": "GrenadeProjectileData",
|
|
"GrenadeProjectileData": "ProjectileData",
|
|
"FireballAtmosphereData": "GameBaseData",
|
|
"TargetProjectileData": "ProjectileData",
|
|
"PlayerData": "ShapeBaseData",
|
|
"ShapeBaseData": "GameBaseData",
|
|
"GameBaseData": "SimDataBlock",
|
|
"SimDataBlock": "SimObject",
|
|
}
|
|
|
|
# Outputs
|
|
global_functions = None
|
|
type_methods = None
|
|
global_values = None
|
|
|
|
datablocks = None
|
|
|
|
def __init__(self, filename):
|
|
file_buffer = ""
|
|
with open(filename, "r") as handle:
|
|
file_buffer = handle.read()
|
|
|
|
# First, we skip the first 33350 or so because there's lots of declarations
|
|
# that the simplified regex will get tripped up on.
|
|
chopped_lines = file_buffer.split("\r\n")
|
|
chopped_lines = chopped_lines[33350:len(chopped_lines)]
|
|
|
|
file_buffer = string.join(chopped_lines)
|
|
|
|
"""
|
|
Now we perform a bit of a hack here because of unnecessary immutable
|
|
memory bullshit: Strings in Python are immutable and due to the way
|
|
the Regex works (can probably be fixed properly at some point),
|
|
methods that have a semicolon in their description (most do) will cause
|
|
the regex to match up until that semicolon, not the one that actually
|
|
delineates the entire method. So as a quick hack, we create a mutable
|
|
memory buffer (just a list) to do single character replacements of ;
|
|
with ~ within the context of strings. We can't simply use replace or any
|
|
of the regular string modification methods because they create copies of
|
|
the string memory which bogs down the system massively at this point: times
|
|
went down from an absolute unknown to merely ~2sec to run the entirety of this
|
|
software using this work around.
|
|
"""
|
|
mutable_buffer = list(file_buffer)
|
|
|
|
string_search = re.finditer(self.string_expression, file_buffer)
|
|
for string_occurrence in string_search:
|
|
string_text = string_occurrence.group(0)
|
|
|
|
for semi_occurrence in range(string_text.count(";")):
|
|
semi_location = string_text.find(";", semi_occurrence)
|
|
mutable_buffer[string_occurrence.start() + semi_location] = "~"
|
|
|
|
# Implode the list together using "" as a delineator, so it just reassembles the payload
|
|
file_buffer = string.join(mutable_buffer, "")
|
|
|
|
# A list of tuples with the following structure: (addr, name, desc, minArgs, maxArgs)
|
|
self.global_functions = [ ]
|
|
|
|
global_method_add_search = re.finditer(self.global_method_add_expression, file_buffer)
|
|
for global_function in global_method_add_search:
|
|
global_function_source = global_function.group(0)
|
|
|
|
opening_index = global_function_source.find("(")
|
|
closing_index = global_function_source.rfind(")", global_function_source.count(")") - 1)
|
|
|
|
global_function_source = global_function_source[opening_index + 1:closing_index]
|
|
|
|
# Extract the description first; this is a huge hack due to the commas in the desc
|
|
global_function_source, global_method_description = self._extract_description(global_function_source)
|
|
global_method_arguments = global_function_source.split(",")
|
|
|
|
# Strip out the global method info
|
|
global_method_name = self._extract_name(global_method_arguments, 0)
|
|
|
|
try:
|
|
global_method_address = self._extract_address(global_method_arguments, 1)
|
|
global_method_minargs = int(global_method_arguments[3])
|
|
global_method_maxargs = int(global_method_arguments[4])
|
|
|
|
self.global_function_count = self.global_function_count + 1
|
|
|
|
global_function = Function(global_method_name, global_method_address, None, global_method_description, global_method_minargs, global_method_maxargs)
|
|
self.global_functions.append(global_function)
|
|
except ValueError:
|
|
pass
|
|
|
|
# A dictionary of classname to tuples with the following structure: (typename, addr, name, desc, minArgs, maxArgs)
|
|
self.type_methods = { }
|
|
self.type_function_counts = { }
|
|
|
|
type_method_add_search = re.finditer(self.type_method_add_expression, file_buffer)
|
|
for type_method in type_method_add_search:
|
|
type_method_source = type_method.group(0)
|
|
|
|
opening_index = type_method_source.find("(")
|
|
closing_index = type_method_source.rfind(")")
|
|
|
|
type_method_source = type_method_source[opening_index + 1:closing_index]
|
|
|
|
# Extract the description first; this is a huge hack due to the commas in the desc
|
|
type_method_source, type_method_description = self._extract_description(type_method_source)
|
|
type_method_arguments = type_method_source.split(",")
|
|
|
|
# Strip out the type method info
|
|
type_method_type = self._extract_name(type_method_arguments, 1)
|
|
type_method_name = self._extract_name(type_method_arguments, 2)
|
|
type_method_address = self._extract_address(type_method_arguments, 3)
|
|
|
|
try:
|
|
type_method_minargs = int(type_method_arguments[5])
|
|
type_method_maxargs = int(type_method_arguments[6])
|
|
|
|
self.type_methods.setdefault(type_method_type, [])
|
|
self.type_function_counts.setdefault(type_method_type, 0)
|
|
|
|
self.type_function_total = self.type_function_total + 1
|
|
self.type_function_counts[type_method_type] = self.type_function_counts[type_method_type] + 1
|
|
|
|
self.type_methods[type_method_type] .append((type_method_type, type_method_address, type_method_name, type_method_description, type_method_minargs, type_method_maxargs))
|
|
except ValueError:
|
|
continue
|
|
|
|
self.global_values = [ ]
|
|
|
|
global_value_add_search = re.finditer(self.global_value_add_expression, file_buffer)
|
|
for global_value in global_value_add_search:
|
|
global_value_source = global_value.group(0)
|
|
|
|
opening_index = global_value_source.find("(")
|
|
closing_index = global_value_source.rfind(")")
|
|
|
|
global_value_source = global_value_source[opening_index + 1:closing_index]
|
|
global_value_arguments = global_value_source.split(",")
|
|
|
|
# Strip out the global value info
|
|
global_value_name = self._extract_name(global_value_arguments, 0)
|
|
global_value_address = self._extract_address(global_value_arguments, 2)
|
|
|
|
global_value_type = int(global_value_arguments[1])
|
|
self.global_values.append(GlobalVariable(global_value_address, global_value_type, 0))
|
|
|
|
# Extract the datablock properties now
|
|
self.datablocks = { }
|
|
|
|
datablock_property_add_search = re.finditer(self.datablock_property_add_expression, file_buffer)
|
|
for datablock_property in datablock_property_add_search:
|
|
datablock_property_source = datablock_property.group(0)
|
|
|
|
"""
|
|
Here we don't have to worry about anything with their own scopes
|
|
sitting above our declarations in the input file this was built for,
|
|
so we just search backwards for the type declaration (Which is always an int)
|
|
and use that to copy out the declaration source.
|
|
"""
|
|
declaration_start = file_buffer.rfind("//----- ", 0, datablock_property.start())
|
|
declaration_end = file_buffer.rfind("-", declaration_start, datablock_property.start())
|
|
declaration_source = file_buffer[declaration_start:declaration_end]
|
|
|
|
calling_method = self._extract_caller(declaration_source)
|
|
|
|
# If we don't know what it is, just add a default value and resolve it this way
|
|
self._datablock_type_table.setdefault(calling_method, calling_method)
|
|
datablock_type = self._datablock_type_table[calling_method]
|
|
self.datablocks.setdefault(datablock_type, Datablock(datablock_type))
|
|
|
|
# Pull the datablock property information now
|
|
datablock_arguments = datablock_property_source.split(",")
|
|
datablock_property_name = self._extract_name(datablock_arguments, 0)
|
|
datablock_property_address = self._extract_address(datablock_arguments, 2)
|
|
|
|
# Write it out and we should be fine.
|
|
current_datablock = self.datablocks[datablock_type]
|
|
current_datablock.properties[datablock_property_name] = Datablock.Property(datablock_property_name, datablock_property_address, "Bla")
|
|
|
|
def build_inheritance_tree(self, typename):
|
|
result = [ typename ]
|
|
|
|
while (typename in self.type_name_inheritance.keys()):
|
|
typename = self.type_name_inheritance[typename]
|
|
result.append(typename)
|
|
|
|
return result
|
|
|
|
# Helper Functions
|
|
def _extract_description(self, source):
|
|
desc_end = source.rfind("\"")
|
|
|
|
# We found the end, now we need to look for the previous parameter delineator
|
|
desc_begin = -1
|
|
ignore_delineator = True # Used for if we're in a quotation
|
|
for index in reversed(range(desc_end)):
|
|
current_character = source[index]
|
|
|
|
if (current_character == "," and not ignore_delineator):
|
|
desc_begin = index + 1
|
|
break
|
|
elif (current_character == "\""):
|
|
ignore_delineator = not ignore_delineator
|
|
|
|
desc = source[desc_begin + 1:desc_end]
|
|
desc = desc.lstrip()
|
|
desc = desc.replace("(int)\"", "")
|
|
|
|
source = source[0:desc_begin] + source[desc_end:len(source)]
|
|
desc = desc.replace("~", ";")
|
|
|
|
return source, desc
|
|
|
|
def _extract_name(self, source, index):
|
|
name = source[index].lstrip()
|
|
name = name[name.find("\"") + 1:len(name)].rstrip("\" ")
|
|
|
|
# Hack fix for the way the engine registers functions for the Sky type
|
|
name = name.replace("(int)&off_7957AC", "Sky")
|
|
|
|
return name
|
|
|
|
def _extract_address(self, source, index):
|
|
address = source[index]
|
|
address = address[address.find("_") + 1:len(address)].rstrip("\" ")
|
|
|
|
return address.lstrip()
|
|
|
|
def _extract_caller(self, source):
|
|
start = source.find("(")
|
|
end = source.find(")", start)
|
|
|
|
result = int(source[start + 1:end],16)
|
|
return hex(result)[2:].upper()
|