TS Scraper | {{ file }}

commit c149d6bad8a0ffd33b21b9b21b368756f891eb7b Author: Robert MacGregor Date: Wed Jul 15 02:40:12 2015 -0400 Initial commit. diff --git a/data/filetempl.html b/data/filetempl.html new file mode 100644 index 0000000..90bfd24 --- /dev/null +++ b/data/filetempl.html @@ -0,0 +1,15 @@ + + + TS Scraper | {{ file }} + + + + + Globals:
+ + {% for global in globals %} + {{ global.name }}
+ {% endfor %} + + + \ No newline at end of file diff --git a/data/indextempl.html b/data/indextempl.html new file mode 100644 index 0000000..74ea2f5 --- /dev/null +++ b/data/indextempl.html @@ -0,0 +1,24 @@ + + + TS Scraper | Index + + + + + + TS Scraper + +

+ + File Index

+ + {% for file in files %} + {{ file.path }}
+ {% endfor %} + +
+ This file was auto-generated by TS Scraper.
+ TS Scraper is written in Python 2 by Robert MacGregor. + + + diff --git a/exporters/__init__.py b/exporters/__init__.py new file mode 100644 index 0000000..8d1c8b6 --- /dev/null +++ b/exporters/__init__.py @@ -0,0 +1 @@ + diff --git a/exporters/doku.py b/exporters/doku.py new file mode 100644 index 0000000..f7913b3 --- /dev/null +++ b/exporters/doku.py @@ -0,0 +1,64 @@ +import re +import os +import sys +import importlib +import os.path + +class Exporter(object): + data = None + + def __init__(self, data): + self.data = data + + def write(self): + with open("Out.txt", "w") as handle: + # Write the header + handle.write("====== Test ======\n\n") + + # For each file entry... + for file in self.data: + if (len(file.global_functions) == 0 and len(file.bound_functions.keys()) == 0 and len(file.datablocks) == 0): + continue + + # Calculate the total entry count + entry_count = len(file.global_functions) + len(file.datablocks) + for type in file.bound_functions.keys(): + entry_count = entry_count + len(file.bound_functions[type]) + + handle.write("===== Entries in %s (%u total) =====\n\n" % (file.path, entry_count)) + handle.write("===== Global Functions (%u total) =====\n\n" % len(file.global_functions)) + + # For each global function... + for function in file.global_functions: + handle.write("==== %s ====\n" % function.name) + handle.write("File (line %u): %s\n\n" % (function.line, file.path)) + + if (len(function.parameters) != 0): + handle.write("Parameters (in order):\n") + + for parameter in function.parameters: + handle.write(" * %s\n" % parameter) + else: + handle.write("Parameters: None\n") + + handle.write("\n") + + # For each known type... + for type in file.bound_functions.keys(): + handle.write("===== Bound Functions on %s (%u total) =====\n\n" % (type, len(file.bound_functions[type]))) + # For each function for this type... + for function in file.bound_functions[type]: + handle.write("==== %s::%s ====\n" % (function.type, function.name)) + handle.write("File (line %u): %s\n\n" % (function.line, file.path)) + + if (len(function.parameters) != 0): + handle.write("Parameters (in order):\n") + + for parameter in function.parameters: + handle.write(" * %s\n" % parameter) + else: + handle.write("Parameters: None\n") + + handle.write("\n") + + print("Done processing.") \ No newline at end of file diff --git a/exporters/html.py b/exporters/html.py new file mode 100644 index 0000000..28d1397 --- /dev/null +++ b/exporters/html.py @@ -0,0 +1,46 @@ +import re +import os +import sys +import importlib +import os.path + +class Exporter(object): + data = None + + def __init__(self, data): + self.data = data + + def write(self): + import jinja2 + + # Read the template files first + file_template = None + with open("data/filetempl.html", "r") as handle: + file_template = handle.read() + + index_template = None + with open("data/indextempl.html", "r") as handle: + index_template = handle.read() + + html_filenames = [ ] + # For each file entry... + for file in self.data: + if (len(file.global_functions) == 0 and len(file.bound_functions.keys()) == 0 and len(file.datablocks) == 0): + continue + + html_filename = file.path.lstrip("./").replace("/", "-") + html_filename, oldextension = os.path.splitext(html_filename) + html_filename = "%s.html" % html_filename + html_filenames.append(html_filename) + + with open(html_filename, "w") as handle: + template = jinja2.Template(file_template) + handle.write(template.render(file=file.path, globals=file.global_functions)) + + # Dump the index file + with open("index.html", "w") as handle: + template = jinja2.Template(index_template) + handle.write(template.render(files=self.data)) + + print("Done processing.") + diff --git a/scriptScrape.py b/scriptScrape.py new file mode 100644 index 0000000..d81a9b4 --- /dev/null +++ b/scriptScrape.py @@ -0,0 +1,179 @@ +import re +import os +import sys +import importlib +import os.path + +class FileEntry(object): + path = None + global_functions = None + bound_functions = None + datablocks = None + + def __init__(self, path): + self.path = path + self.global_functions = [ ] + self.bound_functions = { } + self.datablocks = [ ] + +class Function(object): + name = None + parameters = None + type = None + filepath = None + line = None + + def __init__(self, name, type, parameters, filepath, line): + self.name = name + self.parameters = parameters + self.filepath = filepath + self.line = line + self.type = type + +class Datablock(object): + name = None + type = None + derived = None + + def __init__(self, name, type, derived): + self.name = name + self.type = type + self.derived = derived + +class Application(object): + bound_function_pattern = re.compile("function +(([A-z]|_)+::)([A-z]|_)+\( *(%[A-z]+( *, *%[A-z]+)*)* *\)") + function_pattern = re.compile("function +([A-z]|_)+\( *(%[A-z]+( *, *%[A-z]+)*)* *\)") + datablock_pattern = re.compile("datablock +[A-z]+ *( *[A-z]+ *)( *: *[A-z]+)?") + + def print_usage(self): + print("Usage: '%s '" % sys.argv[0]) + print("Or: '%s exporters' for a list of known exporters." % sys.argv[0]) + + def main(self): + # Load exporters + exporters = { } + for root, dirs, files in os.walk("exporters"): + for filename in files: + module_name, extension = os.path.splitext(filename) + + if (module_name == "__init__"): + continue + + try: + module = importlib.import_module('exporters.%s' % (module_name)) + exporters[module_name] = module + except ImportError as e: + print(e) + + if (len(sys.argv) < 2): + self.print_usage() + return + + if (sys.argv[1] == "exporters"): + print("Available Exporters: ") + + for exporter in exporters.keys(): + print("\t- %s" % exporter) + return + elif(len(sys.argv) != 3): + self.print_usage() + return + + exporter = None + try: + exporter = exporters[sys.argv[2]] + except KeyError as e: + print("Error: No such exporter '%s'." % sys.argv[2]) + self.print_usage() + return + + results = [ ] + global_aliases = { } + typed_aliases = { } + for root, dirs, files in os.walk(sys.argv[1]): + for filename in files: + filepath = os.path.join(root, filename) + + if (not os.path.isfile(filepath)): + continue + + with open(filepath, "r") as handle: + file_entry = FileEntry(filepath) + + file_data = handle.read() + + # Grab Global function definitions + for match in re.finditer(self.function_pattern, file_data): + line = file_data[0:match.start()].count("\n") + 1 + match_split = match.group(0).lstrip("function ").split("(") + name = match_split[0] + + match_split = match_split[1].replace(")", "").split(",") + + parameters = [ ] + for parameter in match_split: + if (parameter == ""): + continue + + parameters.append(parameter.lstrip().rstrip()) + + file_entry.global_functions.append(Function(name, None, parameters, filepath, line)) + + tracked_name = name.lower() + global_aliases.setdefault(tracked_name, (0, filepath, line)) + + occurrence_count, old_filepath, old_line = global_aliases[tracked_name] + occurrence_count = occurrence_count + 1 + global_aliases[tracked_name] = (occurrence_count, old_filepath, old_line) + + if (occurrence_count != 1): + print("Warning: Found a multiple declaration of global function '%s' in %s, line %u! (Original detection: %s, line %u)" % (tracked_name, filepath, line, old_filepath, old_line)) + + + # Grab bound function definitions + for match in re.finditer(self.bound_function_pattern, file_data): + line = file_data[0:match.start()].count("\n") + 1 + + match_split = match.group(0).lstrip("function ").split("::") + type = match_split[0] + + match_split = match_split[1].split("(") + name = match_split[0] + match_split = match_split[1].replace(")", "").split(",") + + parameters = [ ] + for parameter in match_split: + if (parameter == ""): + continue + parameters.append(parameter.lstrip().rstrip()) + + file_entry.bound_functions.setdefault(type, []) + file_entry.bound_functions[type].append(Function(name, type, parameters, filepath, line)) + + tracked_name = name.lower() + tracked_type = type.lower() + typed_aliases.setdefault(tracked_type, {}) + typed_aliases[tracked_type].setdefault(tracked_name, (0, filepath, line)) + + occurrence_count, old_filepath, old_line = typed_aliases[tracked_type][tracked_name] + occurrence_count = occurrence_count + 1 + typed_aliases[tracked_type][tracked_name] = (occurrence_count, old_filepath, old_line) + + if (occurrence_count != 1): + print("Warning: Found a multiple declaration of bound function '%s::%s' in %s, line %u! (Original detection: %s, line %u)" % (tracked_type, tracked_name, filepath, line, old_filepath, old_line)) + + # Grab DB definitions + for match in re.finditer(self.datablock_pattern, file_data): + match_text = match.group(0).lstrip("datablock ") + + #print(match_text) + + # Stick in results + results.append(file_entry) + + # Init the DokuOutput + output = exporter.Exporter(results) + output.write() + +if __name__ == "__main__": + Application().main()