diff --git a/data/generated_ontology.ttl b/data/generated_ontology/generated_ontology_1.ttl similarity index 100% rename from data/generated_ontology.ttl rename to data/generated_ontology/generated_ontology_1.ttl diff --git a/data/reference_ontology.ttl b/data/reference_ontology/reference_ontology_1.ttl similarity index 100% rename from data/reference_ontology.ttl rename to data/reference_ontology/reference_ontology_1.ttl diff --git a/main.py b/main.py index 130e11c4625fab1d43efcdbbdbc136abce7acf9f..807113d7f7d8fe8b1c5481d4ea7d1b4600d8ee40 100644 --- a/main.py +++ b/main.py @@ -2,29 +2,143 @@ # -*-coding:Utf-8 -* """ -ontoScorer: [brief description of the module] +ontoScorer: Batch evaluation of ontology pairs. ------------------------------------------------------------------------------ -Detailed module description, if needed +This script processes multiple pairs of reference and generated ontologies, computes comparison metrics, +and produces reports for each pair as well as a summary report for all pairs. """ -from ontoScorer.scorer import OntoScorer +import os +from ontoScorer.scorer_batch import ScorerBatch + +#-------------------------------------------------------------------------- +# Parameters +#-------------------------------------------------------------------------- + +# Directory containing the ontologies +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +DATA_FOLDER_PATH = os.path.join(SCRIPT_DIR, "data") +OUTPUT_FOLDER_PATH = os.path.join(SCRIPT_DIR, "output_reports") + +# List of ontology pairs (reference, generated) for evaluation. +# Ideally, these paths would be dynamically generated or loaded from some configuration. +ONTOLOGY_PAIRS = [ + (f"{DATA_FOLDER_PATH}/reference_ontology_1.ttl", f"{DATA_FOLDER_PATH}/generated_ontology_1.ttl"), + # (f"{DATA_FOLDER_PATH}/reference_ontology2.ttl", f"{DATA_FOLDER_PATH}/generated_ontology2.ttl"), + # ... +] + +# Prefixes used to harmonize or align concepts in the ontologies +EQUIVALENT_PREFIX = [ + ("base", "https://reference.tetras-libre.fr/base-ontology#", "https://tenet.tetras-libre.fr/base-ontology#"), + ("result", "https://reference.tetras-libre.fr/expected-result#", "https://tenet.tetras-libre.fr/extract-result#") +] + + +#-------------------------------------------------------------------------- +# Useful Method(s) +#-------------------------------------------------------------------------- + +def common_prefix(strings): + """ Return the longest common prefix of the list of strings. """ + if not strings: + return '' + + prefix = strings[0] + for string in strings: + while string[:len(prefix)] != prefix: + prefix = prefix[:-1] + return prefix + +def common_suffix(strings): + """ Return the longest common suffix of the list of strings. """ + if not strings: + return '' + + suffix = strings[0] + for string in strings: + while string[-len(suffix):] != suffix: + suffix = suffix[1:] + return suffix + +def build_ontology_pairs(reference_subfolder_name, generated_subfolder_name): + """ + Build pairs of ontology files from two subfolders based on their filename similarity. + + Args: + - reference_subfolder_name (str): Name of the reference ontology subfolder. + - generated_subfolder_name (str): Name of the generated ontology subfolder. + + Returns: + - list of tuples: Each tuple contains a pair of file paths (reference, generated). + """ + + reference_subfolder = os.path.join(DATA_FOLDER_PATH, reference_subfolder_name) + generated_subfolder = os.path.join(DATA_FOLDER_PATH, generated_subfolder_name) + + reference_files = sorted(os.listdir(reference_subfolder)) + generated_files = sorted(os.listdir(generated_subfolder)) + + pairs = [] + + # Identify the common prefix and suffix for reference files + ref_common_prefix = common_prefix(reference_files) + ref_common_suffix = common_suffix(reference_files) + + # Identify the common prefix and suffix for generated files + gen_common_prefix = common_prefix(generated_files) + gen_common_suffix = common_suffix(generated_files) + + # Extract the varying parts from filenames + ref_variants = [f[len(ref_common_prefix):-len(ref_common_suffix)] for f in reference_files] + gen_variants = [f[len(gen_common_prefix):-len(gen_common_suffix)] for f in generated_files] + + # Pair up matching variants + for r_variant, r_file in zip(ref_variants, reference_files): + if r_variant in gen_variants: + # Use the actual file from the generated_files list + g_file_index = gen_variants.index(r_variant) + g_file = generated_files[g_file_index] + pairs.append((os.path.join(reference_subfolder, r_file), + os.path.join(generated_subfolder, g_file))) + + return pairs + + +#-------------------------------------------------------------------------- +# Main Method +#-------------------------------------------------------------------------- def main(): - # Paths to data - DATA_FOLDER_PATH = "data" - REFERENCE_ONTOLOGY_PATH = f"{DATA_FOLDER_PATH}/reference_ontology.ttl" - GENERATED_ONTOLOGY_PATH = f"{DATA_FOLDER_PATH}/generated_ontology.ttl" - - # Equivalent Prefix (used to harmonize the ontologies) - EQUIVALENT_PREFIX = [ - ("base", "https://reference.tetras-libre.fr/base-ontology#", "https://tenet.tetras-libre.fr/base-ontology#"), - ("result", "https://reference.tetras-libre.fr/expected-result#", "https://tenet.tetras-libre.fr/extract-result#") - ] - - # Scorer Process Run - scorer = OntoScorer(REFERENCE_ONTOLOGY_PATH, GENERATED_ONTOLOGY_PATH, EQUIVALENT_PREFIX) - scorer.compute_metrics() - scorer.generate_report() + print("'\n === OntoScorer Run === '") + + + print("\n" + "-- Initialization") + + print("----- Building ontology pairs...") + # ontology_pairs = ONTOLOGY_PAIRS + ontology_pairs = build_ontology_pairs('reference_ontology', 'generated_ontology') + + print(f"----- Found {len(ontology_pairs)} ontology pair{'s' if len(ontology_pairs) > 1 else ''}.") + + print("----- Initializing ScorerBatch...") + scorer_batch = ScorerBatch(ontology_pairs, EQUIVALENT_PREFIX, OUTPUT_FOLDER_PATH) + + + print("\n" + "-- Analyze") + + print("----- Computing metrics for all ontology pairs...") + scorer_batch.compute_all_metrics() + + print("\n" + "-- Report(s)") + + print("----- Generating individual reports for each ontology pair...") + scorer_batch.generate_all_reports() + + print("----- Generating summary report...") + scorer_batch.generate_summary_report() + + print("\n" + "-- Process completed!") if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/ontoScorer/scorer.py b/ontoScorer/scorer.py index 8abce4cecb224f53074d2b85ec84cfcea6f713ee..a6d07b6970d2827c4794e6ced5f2bd7488395978 100644 --- a/ontoScorer/scorer.py +++ b/ontoScorer/scorer.py @@ -49,6 +49,6 @@ class OntoScorer: def generate_report(self): report = Report(self.reference_ontology, self.generated_ontology, self.metrics) - print(report.generate()) + return report.generate() diff --git a/ontoScorer/scorer_batch.py b/ontoScorer/scorer_batch.py new file mode 100644 index 0000000000000000000000000000000000000000..640ce29396c480b42117e7c2d91fc77947788a2a --- /dev/null +++ b/ontoScorer/scorer_batch.py @@ -0,0 +1,83 @@ +#!/usr/bin/python3.10 +# -*-coding:Utf-8 -*- + +""" +ontoScorer: Batch Module for comparing multiple pairs of reference and generated ontologies. +------------------------------------------------------------------------------------------ +This module facilitates the batch processing of ontology pairs, computing metrics and +generating comparison reports for each pair and a summary report for all. +""" + +import os +from ontology import Ontology +from report import Report +from metrics import Metrics +from scorer import OntoScorer + +class ScorerBatch: + """ + The ScorerBatch class is used to compare multiple pairs of reference and generated ontologies. + + Attributes: + - ontology_pairs (list): List of tuples, where each tuple contains the paths to the reference and generated ontology. + - output_dir (str): Path to the directory where the reports will be written. + - results (list): List of metrics for each pair of ontologies. + """ + + def __init__(self, ontology_pairs, equivalent_prefix, output_dir): + """ + Initializes the ScorerBatch with a list of ontology pairs and output directory. + + Args: + - ontology_pairs (list): List of tuples. Each tuple contains paths to the reference and generated ontology. + - equivalent_prefix (str): Prefix to handle equivalent terms or concepts. + - output_dir (str): Path to the directory where the reports will be written. + """ + self.ontology_pairs = ontology_pairs + self.equivalent_prefix = equivalent_prefix + self.output_dir = output_dir + self.results = {} + + # Create the output directory if it doesn't exist + os.makedirs(self.output_dir, exist_ok=True) + + + def compute_all_metrics(self): + """ + Computes metrics for each pair of ontologies and stores them in the results dictionary. + """ + for ref_onto, gen_onto in self.ontology_pairs: + scorer = OntoScorer(ref_onto, gen_onto, self.equivalent_prefix) + scorer.compute_metrics() + self.results[(ref_onto, gen_onto)] = scorer.metrics + + + def generate_all_reports(self): + """ + Generates and writes a report for each pair of ontologies to the output directory. + """ + for idx, (ref_onto, gen_onto) in enumerate(self.ontology_pairs): + scorer = OntoScorer(ref_onto, gen_onto, self.equivalent_prefix) + report_content = scorer.generate_report() + + report_filename = os.path.join(self.output_dir, f'report_{idx + 1}.txt') + with open(report_filename, 'w') as f: + f.write(report_content) + + + def generate_summary_report(self): + """ + Generates a summary report for all the ontologies and writes it to the output directory. + """ + summary_content = "Summary Report for All Ontologies\n" + summary_content += "="*80 + "\n" + + for idx, ((ref_onto, gen_onto), metrics) in enumerate(self.results.items()): # Adjusted loop to access metrics from dictionary + summary_content += f"Metrics for Ontology Pair {idx + 1}:\n" + report = Report(ref_onto, gen_onto, metrics) + summary_content += report.generate() + "\n" + summary_content += "-"*80 + "\n" + + summary_filename = os.path.join(self.output_dir, 'summary_report.txt') + with open(summary_filename, 'w') as f: + f.write(summary_content) diff --git a/output_reports/report_1.txt b/output_reports/report_1.txt new file mode 100644 index 0000000000000000000000000000000000000000..4c0da7c5c4086857440b1cb5674a67ac80623c93 --- /dev/null +++ b/output_reports/report_1.txt @@ -0,0 +1,98 @@ +=== Ontology Evaluation Report === + +Comparing Reference Ontology with Generated Ontology. + + +== Entities == + +Metrics for Classes: +Precision: NA +Recall: NA +F1 Score: NA +Total Elements: 0 +Matched Elements: 0 + +Metrics for Object properties: +Precision: NA +Recall: NA +F1 Score: NA +Total Elements: 0 +Matched Elements: 0 + +Metrics for Individuals: +Precision: NA +Recall: NA +F1 Score: NA +Total Elements: 0 +Matched Elements: 0 + +Overall Metrics (Synthesis): +Precision: NA +Recall: NA +F1 Score: NA + + +== Taxonomic relations == + +Metrics for Subclass: +Precision: NA +Recall: NA +F1 Score: NA +Total Elements: 0 +Matched Elements: 0 + +Metrics for Subproperty: +Precision: NA +Recall: NA +F1 Score: NA +Total Elements: 0 +Matched Elements: 0 + +Metrics for Instanciation: +Precision: NA +Recall: NA +F1 Score: NA +Total Elements: 0 +Matched Elements: 0 + +Overall Metrics (Synthesis): +Precision: NA +Recall: NA +F1 Score: NA + + +== Non taxonomic relations == + +Metrics for Object properties: +Precision: NA +Recall: NA +F1 Score: NA +Total Elements: 0 +Matched Elements: 0 + +Metrics for Data properties: +Precision: NA +Recall: NA +F1 Score: NA +Total Elements: 0 +Matched Elements: 0 + +Overall Metrics (Synthesis): +Precision: NA +Recall: NA +F1 Score: NA + + +== Axioms == + +Metrics for Restriction axioms: +Precision: NA +Recall: NA +F1 Score: NA +Total Elements: 0 +Matched Elements: 0 + +Overall Metrics (Synthesis): +Precision: NA +Recall: NA +F1 Score: NA diff --git a/output_reports/summary_report.txt b/output_reports/summary_report.txt new file mode 100644 index 0000000000000000000000000000000000000000..a8b14d794dae388fc481c1b044dbfd4848aef17d --- /dev/null +++ b/output_reports/summary_report.txt @@ -0,0 +1,103 @@ +Summary Report for All Ontologies +================================================================================ +Metrics for Ontology Pair 1: +=== Ontology Evaluation Report === + +Comparing Reference Ontology with Generated Ontology. + + +== Entities == + +Metrics for Classes: +Precision: 1.0000 +Recall: 0.7500 +F1 Score: 0.8571 +Total Elements: 12 +Matched Elements: 9 + +Metrics for Object properties: +Precision: 1.0000 +Recall: 1.0000 +F1 Score: 1.0000 +Total Elements: 8 +Matched Elements: 8 + +Metrics for Individuals: +Precision: 1.0000 +Recall: 1.0000 +F1 Score: 1.0000 +Total Elements: 1 +Matched Elements: 1 + +Overall Metrics (Synthesis): +Precision: 1.0000 +Recall: 0.8571 +F1 Score: 0.9231 + + +== Taxonomic relations == + +Metrics for Subclass: +Precision: 0.7000 +Recall: 0.5833 +F1 Score: 0.6364 +Total Elements: 15 +Matched Elements: 7 + +Metrics for Subproperty: +Precision: 1.0000 +Recall: 0.8889 +F1 Score: 0.9412 +Total Elements: 9 +Matched Elements: 8 + +Metrics for Instanciation: +Precision: 0.7500 +Recall: 0.6000 +F1 Score: 0.6667 +Total Elements: 6 +Matched Elements: 3 + +Overall Metrics (Synthesis): +Precision: 0.8182 +Recall: 0.6923 +F1 Score: 0.7500 + + +== Non taxonomic relations == + +Metrics for Object properties: +Precision: NA +Recall: NA +F1 Score: NA +Total Elements: 0 +Matched Elements: 0 + +Metrics for Data properties: +Precision: NA +Recall: NA +F1 Score: NA +Total Elements: 0 +Matched Elements: 0 + +Overall Metrics (Synthesis): +Precision: NA +Recall: NA +F1 Score: NA + + +== Axioms == + +Metrics for Restriction axioms: +Precision: 1.0000 +Recall: 0.6250 +F1 Score: 0.7692 +Total Elements: 8 +Matched Elements: 5 + +Overall Metrics (Synthesis): +Precision: 1.0000 +Recall: 0.6250 +F1 Score: 0.7692 + +--------------------------------------------------------------------------------