Make exporters actually work and begin stripping the multi-directory operations that were never really necessary

This commit is contained in:
Robert MacGregor 2016-04-21 18:32:47 -04:00
parent 574385d707
commit 8f969d7208
5 changed files with 245 additions and 219 deletions

View file

@ -2,20 +2,20 @@
<HEAD>
<TITLE>TS Scraper | Index</TITLE>
</HEAD>
<BODY>
<CENTER>
<FONT SIZE="12">
TS Scraper
</FONT>
<HR></br>
<FONT SIZE="6"><U>File Index</U></FONT></br></br>
{% for file in files %}
<a href="{{ file.path }}">{{ file.path }}</a></br>
{% endfor %}
</br>
This file was auto-generated by TS Scraper.</br>
TS Scraper is written in Python 2 by Robert MacGregor.

View file

@ -6,43 +6,44 @@ import os.path
class Exporter(object):
data = None
def __init__(self, data):
self.data = data
def write(self, directory):
def write(self):
with open("Out.txt", "w") as handle:
# Write the header
handle.write("====== Test ======\n\n")
# For each file entry...
for file in self.data:
for file in self.data["files"]:
if (len(file.global_functions) == 0 and len(file.bound_functions.keys()) == 0 and len(file.datablocks) == 0):
continue
# Calculate the total entry count
entry_count = len(file.global_functions) + len(file.datablocks)
for type in file.bound_functions.keys():
entry_count = entry_count + len(file.bound_functions[type])
handle.write("===== Entries in %s (%u total) =====\n\n" % (file.path, entry_count))
handle.write("===== Global Functions (%u total) =====\n\n" % len(file.global_functions))
# For each global function...
for function in file.global_functions:
handle.write("==== %s ====\n" % function.name)
handle.write("File (line %u): %s\n\n" % (function.line, file.path))
if (len(function.parameters) != 0):
handle.write("Parameters (in order):\n")
for parameter in function.parameters:
handle.write(" * %s\n" % parameter)
else:
handle.write("Parameters: None\n")
handle.write("\n")
# For each known type...
for type in file.bound_functions.keys():
handle.write("===== Bound Functions on %s (%u total) =====\n\n" % (type, len(file.bound_functions[type])))
@ -50,15 +51,15 @@ class Exporter(object):
for function in file.bound_functions[type]:
handle.write("==== %s::%s ====\n" % (function.type, function.name))
handle.write("File (line %u): %s\n\n" % (function.line, file.path))
if (len(function.parameters) != 0):
handle.write("Parameters (in order):\n")
for parameter in function.parameters:
handle.write(" * %s\n" % parameter)
else:
handle.write("Parameters: None\n")
handle.write("\n")
print("Done processing.")
print("Done processing.")

View file

@ -6,41 +6,67 @@ import os.path
class Exporter(object):
data = None
def __init__(self, data):
def __init__(self, data, target_directory):
self.data = data
def write(self):
self.target_directory = target_directory
def _path_visitor(self, arg, dirname, names):
for name in names:
mirrored_path = os.path.join(dirname, name)
relative_path = os.path.join(arg, mirrored_path.replace(self.target_directory + "/", ""))
try:
if (os.path.isdir(mirrored_path)):
print(relative_path)
os.mkdir(relative_path)
except OSError:
pass
def write(self, directory):
import jinja2
# Read the template files first
file_template = None
with open("data/filetempl.html", "r") as handle:
file_template = handle.read()
index_template = None
with open("data/indextempl.html", "r") as handle:
index_template = handle.read()
html_filenames = [ ]
# Recurse the target directory and recreate its structure
os.path.walk(self.target_directory, self._path_visitor, directory)
# For each file entry...
for file in self.data:
script_relative_paths = [ ]
for file in self.data["files"]:
if (len(file.global_functions) == 0 and len(file.bound_functions.keys()) == 0 and len(file.datablocks) == 0):
continue
html_filename = file.path.lstrip("./").replace("/", "-")
# First, we collapse to a file path relative to our output dir
# FIXME: Dirty hack to make sure the os.path.join works
html_filename = file.path.replace(self.target_directory + "/", "")
script_relative = html_filename
script_relative_paths.append(script_relative)
# Next, we ensure that the subdirectories exist
#html_filename = html_filename.lstrip("./").replace("/", "-")
html_filename, oldextension = os.path.splitext(html_filename)
html_filename = "%s.html" % html_filename
html_filenames.append(html_filename)
with open(html_filename, "w") as handle:
with open(os.path.join(directory, html_filename), "w") as handle:
template = jinja2.Template(file_template)
handle.write(template.render(file=file.path, globals=file.global_functions))
# Dump the index file
with open("index.html", "w") as handle:
with open(os.path.join(directory, "index.html"), "w") as handle:
template = jinja2.Template(index_template)
handle.write(template.render(files=self.data))
handle.write(template.render(files=script_relative_paths))
print("Done processing.")

52
main.py
View file

@ -18,32 +18,32 @@ class Application(object):
thread_count = 8
threads = None
target_directory = None
target_exporter = None
def print_usage(self):
print("Usage: '%s <exporter> <target directories...>'" % sys.argv[0])
print("Usage: '%s <exporter> <output directory> <target directories...>'" % sys.argv[0])
print("Or: '%s exporters' for a list of known exporters." % sys.argv[0])
def get_available_exporters(self):
exporters = { }
for root, dirs, files in os.walk("exporters"):
for filename in files:
module_name, extension = os.path.splitext(filename)
if (module_name == "__init__"):
continue
try:
module = importlib.import_module('exporters.%s' % (module_name))
exporters[module_name] = module
except ImportError as e:
print(e)
return exporters
def main(self):
"""
The main entry point of the application. This is equivalent to
@ -52,22 +52,22 @@ class Application(object):
if (len(sys.argv) < 2):
self.print_usage()
return
exporters = self.get_available_exporters()
if (sys.argv[1] == "exporters"):
print("Available Exporters: ")
for exporter in exporters.keys():
for exporter in exporters:
print("\t- %s" % exporter)
return
print("\t- None")
elif(len(sys.argv) < 3):
return
elif(len(sys.argv) < 4):
self.print_usage()
return
self.target_directory = sys.argv[2:]
self.target_directory = sys.argv[3]
self.output_directory = sys.argv[2]
self.target_exporter = sys.argv[1]
self.run()
@ -85,11 +85,13 @@ class Application(object):
scraper = tsscraper.TSScraper(self.target_directory, self.thread_count)
results = scraper.process()
# Init the DokuOutput
# if (exporter is not None):
# output = exporter.Exporter(results)
# output.write()
# Init the exporter
if (exporter is not None):
# Ensure that the output directory at least exists
os.mkdir(self.output_directory)
output = exporter.Exporter(results, self.target_directory)
output.write(self.output_directory)
if __name__ == "__main__":
print("Operation Completion-----------------------\n%f Seconds" % timeit.timeit("Application().main()", number=1, setup="from __main__ import Application"))
print("Operation Completion-----------------------\n%f Seconds" % timeit.timeit("Application().main()", number=1, setup="from __main__ import Application"))

View file

@ -18,13 +18,13 @@ class FileEntry(object):
global_functions = None
bound_functions = None
datablocks = None
def __init__(self, path):
self.path = path
self.global_functions = [ ]
self.bound_functions = { }
self.datablocks = [ ]
class Function(object):
"""
Class representing a Function entity in the game code tree
@ -35,10 +35,10 @@ class Function(object):
type = None
filepath = None
line = None
aliases = None
comments = None
def __init__(self, name, type, parameters, filepath, line):
self.name = name
self.parameters = parameters
@ -53,13 +53,13 @@ class Global(object):
in the coding.
"""
name = None
def __init__(self, name):
self.name = name
def __repr__(self):
return "$%s" % self.name
class Datablock(object):
"""
Class representing a datablock entry. It contains the type, derived
@ -69,12 +69,12 @@ class Datablock(object):
type = None
derived = None
line = None
aliases = None
properties = None
filepath = None
comments = None
def __init__(self, name, type, properties, filepath, line, derived):
self.name = name
self.type = type
@ -83,7 +83,7 @@ class Datablock(object):
self.aliases = [ ]
self.properties = properties
self.filepath = filepath
def scrape_file(input):
"""
This method is a performance critical code segment in the scraper.
@ -94,85 +94,85 @@ def scrape_file(input):
filepath, parameter_split, combined_pattern = input
key_value_pattern = re.compile("(?<!.)\s*[A-z]+\s*=\s*(\S+);")
global_usages = re.compile("\{.*\$[A-z]+(::([A-z]+))*?.*\}")
global_pattern = re.compile("(?<!.)\s*$[A-z]+(::([A-z]+))*?")
parameter_split = re.compile("\s*,\s*")
assignment_split = re.compile("\s*=\s*")
with open(filepath, "r") as handle:
result = FileEntry(filepath)
file_data = handle.read()
# Parse for all sequences now
for match in re.finditer(combined_pattern, file_data):
line = file_data[0:match.start()].count("\n") + 1
match_text = match.group(0).strip()
if (match_text[0:8] == "function"):
# :: Can't occur correctly in TS in just the function body, so we determine bound functions via the
# presence of ::
if ("::" in match_text):
match_split = match.group(0).strip()[9:].split("::")
type = match_split[0].lower()
match_split = match_split[1].split("(")
name = match_split[0].lower()
match_split = match_split[1].replace(")", "").split(",")
parameters = [ ]
for parameter in match_split:
if (parameter == ""):
continue
parameters.append(parameter.lstrip().rstrip())
result.bound_functions.setdefault(type, [])
result.bound_functions[type].append(Function(name, type, parameters, filepath, line))
else:
match_split = match.group(0).strip()[9:].split("(")
name = match_split[0].lower()
match_split = re.split(parameter_split, match_split[1].replace(")", ""))
parameters = [ ]
for parameter in match_split:
if (parameter == ""):
continue
parameters.append(parameter.strip())
else:
line = file_data[0:match.start()].count("\n") + 1
match_text = match.group(0).lstrip().rstrip()
header = match_text[0:match_text.find("{")]
header = match_text[0:match_text.find("{")]
type = header[10:header.find("(")].strip().lower()
name = header[header.find("(") + 1:header.find(")")].strip().lower()
# Inherited?
inherited = None
inheritor = header.find(":")
if (inheritor != -1):
inherited = header[inheritor + 1:].strip().lower()
# Blow through key, values
properties = { }
for property_match in re.finditer(key_value_pattern, match_text):
property_text = property_match.group(0)
key, value = re.split(assignment_split, property_text, 1)
key = key.lstrip().lower()
value = value.rstrip().rstrip(";")
# Global reference
if (value[0] == "$"):
value = Global(value[1:])
# String
elif (value[0] == "\""):
value = value[1:value.rfind("\"")]
value = value[1:value.rfind("\"")]
# Numerics
else:
try:
@ -180,41 +180,41 @@ def scrape_file(input):
except ValueError as e:
# If this was raised, treat it as a string
pass
properties[key] = value
result.datablocks.append(Datablock(name, type, properties, filepath, line, inherited))
return result
return result
class TSScraper(object):
_process_count = None
_target_directories = None
_dependencies = None
_combined_pattern = re.compile("(?<!.)\s*function\s+(([A-z]|_))+(::([A-z]|_)+)*\(\s*(%[A-z]+(\s*,\s*%[A-z]+)*)*\s*\)|((?<!.)\s*datablock\s+[A-z]+\s*\(\s*\S+\s*\)\s*(:\s*[A-z]+)?\s*(//.*)?\s*\{(\s|\S)*?\s*(?<!.)\};)")
bound_function_pattern = re.compile("(?<!.)\s*function\s+(([A-z]|_)+::)([A-z]|_)+\(\s*(%[A-z]+(\s*,\s*%[A-z]+)*)*\s*\)")
function_pattern = re.compile("(?<!.)\s*function\s+([A-z]|_)+\(\s*(%[A-z]+(\w*,\s*%[A-z]+)*)*\s*\)")
function_pattern = re.compile("(?<!.)\s*function\s+(([A-z]|_)+::)([A-z]|_)+\(\s*(%[A-z]+(\w*,\s*%[A-z]+)*)*\s*\)")
datablock_pattern = re.compile("(?<!.)\s*datablock\s+[A-z]+\s*\(\s*\S+\s*\)\s*(:\s*[A-z]+)?\s*(//.*)?\s*\{(\s|\S)*?\s*(?<!.)\};")
key_value_pattern = re.compile("(?<!.)\s*[A-z]+\s*=\s*(\S+);")
#block_iterator = re.compile("function\s+[A-z]+\s*\(\s*(%[A-z]+(\w*,\s*%[A-z]+)*)*\s*\)\{\S*\}")
# (?<!{)\s*\$[A-z]+(::([A-z]+))*?\s*(?!})
global_usages = re.compile("\{.*\$[A-z]+(::([A-z]+))*?.*\}")
global_pattern = re.compile("(?<!.)\s*$[A-z]+(::([A-z]+))*?")
parameter_split = re.compile("\s*,\s*")
assignment_split = re.compile("\s*=\s*")
_log_lines = None
# Rules for verifying datablock information
_datablock_rules = {
"tracerprojectiledata": {
@ -224,339 +224,339 @@ class TSScraper(object):
"fizzletimems": (lambda x: x >= 0, "Cannot use negative fizzle time!")
}
},
"shapebaseimagedata": {
"references": [ ],
"declared": [ ],
"checks": {
"checks": {
}
},
"itemdata": {
"references": [ ],
"declared": [ ],
"checks": { "pickupradius": (lambda x: x > 0, "Items should have >= 1 pickup radius.")
}
},
"audioprofile": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"simdatablock": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"jeteffectdata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"hovervehicledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
},
"stationfxpersonaldata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"cameradata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"triggerdata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"wheeledvehicledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"tsshapeconstructor": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"bombprojectiledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"stationfxvehicledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"staticshapedata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"decaldata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"repairprojectiledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"explosiondata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"linearprojectiledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"elfprojectiledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"linearflareprojectiledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"sensordata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"forcefieldbaredata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"particledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"particleemitterdata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"playerdata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"turretdata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"turretimagedata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"shockwavedata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"seekerprojectiledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"debrisdata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"grenadeprojectiledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"sniperprojectiledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"sniperprojectiledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"flyingvehicledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"splashdata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"energyprojectiledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"flareprojectiledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"targetprojectiledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"shocklanceprojectiledata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"effectprofile": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"precipitationdata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"commandericondata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"missionmarkerdata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"particleemissiondummydata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"fireballatmospheredata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"audiodescription": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"lightningdata": {
"references": [ ],
"declared": [ ],
"checks": { },
},
"audioenvironment": {
"references": [ ],
"declared": [ ],
"checks": { },
},
}
def __init__(self, target_directories, process_count = 0):
self._process_count = process_count
self._target_directories = target_directories
self._log_lines = [ ]
def get_file_list(self, directory):
output = [ ]
previous_working_directory = os.getcwd()
os.chdir(directory)
for root, dirs, files in os.walk("."):
for filename in files:
for filename in files:
relative_path = os.path.join(root, filename)
if (not os.path.isfile(relative_path)):
continue
absolute_path = os.path.realpath(relative_path)
absolute_path = os.path.realpath(relative_path)
# Only check TS files
name, extension = os.path.splitext(filename)
if (extension != ".cs"):
continue
continue
output.append((absolute_path, relative_path.lower()))
os.chdir(previous_working_directory)
return output
def _parse_stage(self, target_files):
results = None
if (self._process_count > 0):
# Create a list with all the required data for the multi-process
input = [ ]
for target_file in target_files:
input.append((target_file, self.parameter_split, self._combined_pattern))
pool = multiprocessing.Pool(processes=self._process_count)
results = pool.map(scrape_file, input)
else:
@ -564,24 +564,24 @@ class TSScraper(object):
for target_file in target_files:
results.append(scrape_file((target_file, self.parameter_split, self._combined_pattern)))
return results
def _declaration_stage(self, parse_results):
# Entries we've already processed
processed_entries = { }
# For each file entry
known_datablocks = { }
for file in parse_results:
for file in parse_results:
# For each global function
for global_function in file.global_functions:
processed_entries.setdefault(global_function.name, global_function)
# Check for declarations
if (processed_entries[global_function.name] is not global_function):
known_entry = processed_entries[global_function.name]
# Redeclaration with different param count
if (len(known_entry.parameters) != len(global_function.parameters)):
global_function.aliases.append(known_entry)
@ -592,19 +592,19 @@ class TSScraper(object):
global_function.aliases.append(known_entry)
known_entry.aliases.append(global_function)
print("Warning: Global function '%s' redeclared in %s, line %u! (Original declaration in %s, line %u)" % (known_entry.name, global_function.filepath, global_function.line, known_entry.filepath, known_entry.line))
processed_entries = { }
# For each bound function
for bound_type in file.bound_functions.keys():
for bound_function in file.bound_functions[bound_type]:
processed_entries.setdefault(bound_function.type, {})
processed_entries[bound_function.type].setdefault(bound_function.name, bound_function)
# Check for declarations
if (processed_entries[bound_function.type][bound_function.name] is not bound_function):
known_entry = processed_entries[bound_function.type][bound_function.name]
# Redeclaration with different param count
if (len(known_entry.parameters) != len(bound_function.parameters)):
bound_function.aliases.append(known_entry)
@ -615,20 +615,20 @@ class TSScraper(object):
bound_function.aliases.append(known_entry)
known_entry.aliases.append(bound_function)
print("Warning: Bound function '%s::%s' redeclared in %s, line %u! (Original declaration in %s, line %u)" % (known_entry.type, known_entry.name, bound_function.filepath, bound_function.line, known_entry.filepath, known_entry.line))
processed_entries = { }
# For each datablock
for datablock in file.datablocks:
processed_entries.setdefault(datablock.name, datablock)
known_datablocks.setdefault(datablock.name, [])
known_datablocks[datablock.name].append(datablock)
# Check for declarations
if (processed_entries[datablock.name] is not datablock):
known_entry = processed_entries[datablock.name]
# Redeclaration with different parent
if (known_entry.derived != datablock.derived):
known_entry.aliases.append(datablock)
@ -639,10 +639,10 @@ class TSScraper(object):
known_entry.aliases.append(datablock)
datablock.aliases.append(known_entry)
print("Warning: Datablock '%s' redeclared in %s, line %u! (Original declaration in %s, line %u" % (datablock.name, datablock.filepath, datablock.line, known_entry.filepath, known_entry.line))
return known_datablocks
def _inheritance_stage(self, parse_results, datablock_list):
def _inheritance_stage(self, parse_results, datablock_list):
# For each file entry
for file in parse_results:
# For each datablock
@ -651,7 +651,7 @@ class TSScraper(object):
print("Warning: Datablock '%s' derives from non-existent parent '%s'! (Declaration in %s, line %u)" % (datablock.name, datablock.derived, datablock.filepath, datablock.line))
elif (datablock.derived is not None):
datablock.derived = datablock_list[datablock.derived]
def _reference_stage(self, parse_results, datablock_list):
# For each file entry
for file in parse_results:
@ -665,12 +665,12 @@ class TSScraper(object):
else:
if (datablock.properties[reference].lower() not in datablock_list.keys()):
print("Reference Warning: %s Datablock '%s' references '%s' in property '%s', which does not exist! (Declaration in %s, line %u)" % (datablock.type, datablock.name, datablock.properties[reference], reference, datablock.filepath, datablock.line))
# Check each declaration
for declaration in self._datablock_rules[datablock.type]["declared"]:
if (declaration not in datablock.properties):
print("Declaration Warning: %s Datablock '%s' required property '%s' not declared! (Declaration in %s, line %u)" % (datablock.type, datablock.name, declaration, datablock.filepath, datablock.line))
# Run custom checks
for check in self._datablock_rules[datablock.type]["checks"].keys():
# Is it declared?
@ -683,47 +683,44 @@ class TSScraper(object):
else:
print("Program Error: Unknown datablock type '%s'! This means the software does not know how to check this datablock. (Declaration in %s, line %u)" % (datablock.type, datablock.filepath, datablock.line))
def process(self):
result = None
def process(self):
# Process each directory sequentially
target_files = { }
for index, target_directory in enumerate(self._target_directories):
if (os.path.isdir(target_directory) is False):
raise IOError("No such directory to recurse (#%u): '%s'" % (index, target_directory))
print("INFO: Building file list for directory '%s' ..." % target_directory)
current_files = self.get_file_list(target_directory)
# Does a previous entry exist in the target file list?
for current_absolute_path, current_relative_path in current_files:
target_files[current_relative_path] = current_absolute_path
target_directory = self._target_directories
if (os.path.isdir(target_directory) is False):
raise IOError("No such directory to recurse (#%u): '%s'" % (index, target_directory))
print("INFO: Building file list for directory '%s' ..." % target_directory)
current_files = self.get_file_list(target_directory)
# Does a previous entry exist in the target file list?
for current_absolute_path, current_relative_path in current_files:
target_files[current_relative_path] = current_absolute_path
# Build the list now
target_file_list = [ ]
for current_relative_file in target_files.keys():
for current_relative_file in target_files.keys():
target_file_list.append(target_files[current_relative_file])
# Perform the initial parse
print("INFO: Performing parse stage ...")
print("INFO: Performing parse stage ...")
parse_results = self._parse_stage(target_file_list)
# Perform the declaration analysis
print("INFO: Performing declaration analysis. ...")
datablock_list = self._declaration_stage(parse_results)
# Perform DB inheritance analysis
print("INFO: Performing datablock inheritance analysis ...")
self._inheritance_stage(parse_results, datablock_list)
# Perform DB reference analysis
print("INFO: Performing datablock reference analysis ...")
self._reference_stage(parse_results, datablock_list)
# We're done, return the results
print("INFO: Done.")
return result
return { "files": parse_results, "datablocks": datablock_list }