Skip to content
Snippets Groups Projects
Commit 3af96c18 authored by Eliott Sammier's avatar Eliott Sammier
Browse files

Automatic log file comparison between parsers

parent d868282f
No related branches found
No related tags found
No related merge requests found
import filecmp
from pprint import pprint
from typing import Optional
......@@ -122,18 +123,32 @@ def parse_manifest_rec(
extract_mosetp.parse_mosetp(graph, f"{SOURCE_DIR}/sco/{id}.html", id)
import extract_page
def compare_files(f1, f2):
print(
"Files {} and {} {}.".format(
f1, f2, "are identical" if filecmp.cmp(f1, f2) else "differ"
)
)
def main():
g = create_graph()
# Create or reset debug log files for all activity parsers, to compare their
# results afterwards
parsers = ("Match", "Xpath", "Regex")
logfiles = [f"/tmp/{p}Parser_debuglog.txt" for p in parsers]
for logfile in logfiles:
with open(logfile, "w") as f:
print("", file=f)
parse_manifest(g)
export_graph(g)
# extract_page.parse_page(
# g,
# f"{SOURCE_DIR}/contenu/pages/pg60.html",
# "pg60",
# )
# Compare log files 2 by 2
compare_files(logfiles[0], logfiles[1])
compare_files(logfiles[0], logfiles[2])
compare_files(logfiles[1], logfiles[2])
if __name__ == "__main__":
......
......@@ -301,7 +301,7 @@ def parse_page(graph: Graph, filepath: str, id: str):
# Try different parsers, each writing to a different file to compare their results
for parser in [XpathParser(), MatchParser(graph, id), RegexParser(graph, id)]:
with open(f"/tmp/{str(parser)}.txt", "a") as f:
with open(f"/tmp/{str(parser)}_debuglog.txt", "a") as f:
print(f"{id:8}", end="", file=f)
try:
parser.parse(js, output=f)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment