New module scorer_batch to process multiple pairs of ontologies

65b0c4be · Aurélien Lamercerie · 2e696ad5 · 65b0c4be · 65b0c4be · 65b0c4be
Commit 65b0c4be authored 1 year ago by Aurélien Lamercerie
--- a/data/generated_ontology.ttl
+++ b/data/generated_ontology.ttl
--- a/data/reference_ontology.ttl
+++ b/data/reference_ontology.ttl
--- a/main.py
+++ b/main.py
@@ -2,29 +2,143 @@
 # -*-coding:Utf-8 -*
 """
-ontoScorer: [brief description of the module]
+ontoScorer: Batch evaluation of ontology pairs.
 ------------------------------------------------------------------------------
-Detailed module description, if needed
+This script processes multiple pairs of reference and generated ontologies, computes comparison metrics, 
+and produces reports for each pair as well as a summary report for all pairs.
 """
-from ontoScorer.scorer import OntoScorer
+import os
+from ontoScorer.scorer_batch import ScorerBatch
-def main():
+#--------------------------------------------------------------------------
-    # Paths to data
+# Parameters
-    DATA_FOLDER_PATH = "data"
+#--------------------------------------------------------------------------
-    REFERENCE_ONTOLOGY_PATH = f"{DATA_FOLDER_PATH}/reference_ontology.ttl"
-    GENERATED_ONTOLOGY_PATH = f"{DATA_FOLDER_PATH}/generated_ontology.ttl"
+# Directory containing the ontologies
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+DATA_FOLDER_PATH = os.path.join(SCRIPT_DIR, "data")
+OUTPUT_FOLDER_PATH = os.path.join(SCRIPT_DIR, "output_reports")
-    # Equivalent Prefix (used to harmonize the ontologies)
+# List of ontology pairs (reference, generated) for evaluation.
+# Ideally, these paths would be dynamically generated or loaded from some configuration.
+ONTOLOGY_PAIRS = [
+    (f"{DATA_FOLDER_PATH}/reference_ontology_1.ttl", f"{DATA_FOLDER_PATH}/generated_ontology_1.ttl"),
+    # (f"{DATA_FOLDER_PATH}/reference_ontology2.ttl", f"{DATA_FOLDER_PATH}/generated_ontology2.ttl"),
+    # ...
+]
+# Prefixes used to harmonize or align concepts in the ontologies
 EQUIVALENT_PREFIX = [
    ("base", "https://reference.tetras-libre.fr/base-ontology#", "https://tenet.tetras-libre.fr/base-ontology#"),
    ("result", "https://reference.tetras-libre.fr/expected-result#", "https://tenet.tetras-libre.fr/extract-result#")
 ] 
-    # Scorer Process Run
-    scorer = OntoScorer(REFERENCE_ONTOLOGY_PATH, GENERATED_ONTOLOGY_PATH, EQUIVALENT_PREFIX)
+#--------------------------------------------------------------------------
-    scorer.compute_metrics()
+# Useful Method(s)
-    scorer.generate_report()
+#--------------------------------------------------------------------------
+def common_prefix(strings):
+    """ Return the longest common prefix of the list of strings. """
+    if not strings:
+        return ''
+    prefix = strings[0]
+    for string in strings:
+        while string[:len(prefix)] != prefix:
+            prefix = prefix[:-1]
+    return prefix
+def common_suffix(strings):
+    """ Return the longest common suffix of the list of strings. """
+    if not strings:
+        return ''
+    suffix = strings[0]
+    for string in strings:
+        while string[-len(suffix):] != suffix:
+            suffix = suffix[1:]
+    return suffix
+def build_ontology_pairs(reference_subfolder_name, generated_subfolder_name):
+    """
+    Build pairs of ontology files from two subfolders based on their filename similarity.
+    Args:
+    - reference_subfolder_name (str): Name of the reference ontology subfolder.
+    - generated_subfolder_name (str): Name of the generated ontology subfolder.
+    Returns:
+    - list of tuples: Each tuple contains a pair of file paths (reference, generated).
+    """
+    reference_subfolder = os.path.join(DATA_FOLDER_PATH, reference_subfolder_name)
+    generated_subfolder = os.path.join(DATA_FOLDER_PATH, generated_subfolder_name)
+    reference_files = sorted(os.listdir(reference_subfolder))
+    generated_files = sorted(os.listdir(generated_subfolder))
+    pairs = []
+    # Identify the common prefix and suffix for reference files
+    ref_common_prefix = common_prefix(reference_files)
+    ref_common_suffix = common_suffix(reference_files)
+    # Identify the common prefix and suffix for generated files
+    gen_common_prefix = common_prefix(generated_files)
+    gen_common_suffix = common_suffix(generated_files)
+    # Extract the varying parts from filenames
+    ref_variants = [f[len(ref_common_prefix):-len(ref_common_suffix)] for f in reference_files]
+    gen_variants = [f[len(gen_common_prefix):-len(gen_common_suffix)] for f in generated_files]
+    # Pair up matching variants
+    for r_variant, r_file in zip(ref_variants, reference_files):
+        if r_variant in gen_variants:
+            # Use the actual file from the generated_files list
+            g_file_index = gen_variants.index(r_variant)
+            g_file = generated_files[g_file_index]
+            pairs.append((os.path.join(reference_subfolder, r_file),
+                          os.path.join(generated_subfolder, g_file)))
+    return pairs
+#--------------------------------------------------------------------------
+# Main Method
+#--------------------------------------------------------------------------
+def main():
+    print("'\n === OntoScorer Run === '")
+    print("\n" + "-- Initialization")
+    print("----- Building ontology pairs...")
+    # ontology_pairs = ONTOLOGY_PAIRS
+    ontology_pairs = build_ontology_pairs('reference_ontology', 'generated_ontology')
+    print(f"----- Found {len(ontology_pairs)} ontology pair{'s' if len(ontology_pairs) > 1 else ''}.")
+    print("----- Initializing ScorerBatch...")
+    scorer_batch = ScorerBatch(ontology_pairs, EQUIVALENT_PREFIX, OUTPUT_FOLDER_PATH)
+    print("\n" + "-- Analyze")
+    print("----- Computing metrics for all ontology pairs...")
+    scorer_batch.compute_all_metrics()
+    print("\n" + "-- Report(s)")
+    print("----- Generating individual reports for each ontology pair...")
+    scorer_batch.generate_all_reports()
+    print("----- Generating summary report...")
+    scorer_batch.generate_summary_report()
+    print("\n" + "-- Process completed!")
 if __name__ == "__main__":
    main()
--- a/ontoScorer/scorer.py
+++ b/ontoScorer/scorer.py
@@ -49,6 +49,6 @@ class OntoScorer:
    def generate_report(self):
        report = Report(self.reference_ontology, self.generated_ontology, self.metrics)
-        print(report.generate())
+        return report.generate()
--- a/ontoScorer/scorer_batch.py
+++ b/ontoScorer/scorer_batch.py
+#!/usr/bin/python3.10
+# -*-coding:Utf-8 -*-
+"""
+ontoScorer: Batch Module for comparing multiple pairs of reference and generated ontologies.
+------------------------------------------------------------------------------------------
+This module facilitates the batch processing of ontology pairs, computing metrics and
+generating comparison reports for each pair and a summary report for all.
+"""
+import os
+from ontology import Ontology
+from report import Report
+from metrics import Metrics
+from scorer import OntoScorer
+class ScorerBatch:
+    """
+    The ScorerBatch class is used to compare multiple pairs of reference and generated ontologies.
+    Attributes:
+    - ontology_pairs (list): List of tuples, where each tuple contains the paths to the reference and generated ontology.
+    - output_dir (str): Path to the directory where the reports will be written.
+    - results (list): List of metrics for each pair of ontologies.
+    """
+    def __init__(self, ontology_pairs, equivalent_prefix, output_dir):
+        """
+        Initializes the ScorerBatch with a list of ontology pairs and output directory.
+        Args:
+        - ontology_pairs (list): List of tuples. Each tuple contains paths to the reference and generated ontology.
+        - equivalent_prefix (str): Prefix to handle equivalent terms or concepts.
+        - output_dir (str): Path to the directory where the reports will be written.
+        """
+        self.ontology_pairs = ontology_pairs
+        self.equivalent_prefix = equivalent_prefix
+        self.output_dir = output_dir
+        self.results = {}
+        # Create the output directory if it doesn't exist
+        os.makedirs(self.output_dir, exist_ok=True)
+    def compute_all_metrics(self):
+        """
+        Computes metrics for each pair of ontologies and stores them in the results dictionary.
+        """
+        for ref_onto, gen_onto in self.ontology_pairs:
+            scorer = OntoScorer(ref_onto, gen_onto, self.equivalent_prefix)
+            scorer.compute_metrics()
+            self.results[(ref_onto, gen_onto)] = scorer.metrics 
+    def generate_all_reports(self):
+        """
+        Generates and writes a report for each pair of ontologies to the output directory.
+        """
+        for idx, (ref_onto, gen_onto) in enumerate(self.ontology_pairs):
+            scorer = OntoScorer(ref_onto, gen_onto, self.equivalent_prefix)
+            report_content = scorer.generate_report()
+            report_filename = os.path.join(self.output_dir, f'report_{idx + 1}.txt')
+            with open(report_filename, 'w') as f:
+                f.write(report_content)
+    def generate_summary_report(self):
+        """
+        Generates a summary report for all the ontologies and writes it to the output directory.
+        """
+        summary_content = "Summary Report for All Ontologies\n"
+        summary_content += "="*80 + "\n"
+        for idx, ((ref_onto, gen_onto), metrics) in enumerate(self.results.items()):  # Adjusted loop to access metrics from dictionary
+            summary_content += f"Metrics for Ontology Pair {idx + 1}:\n"
+            report = Report(ref_onto, gen_onto, metrics)
+            summary_content += report.generate() + "\n"
+            summary_content += "-"*80 + "\n"
+        summary_filename = os.path.join(self.output_dir, 'summary_report.txt')
+        with open(summary_filename, 'w') as f:
+            f.write(summary_content)
--- a/output_reports/report_1.txt
+++ b/output_reports/report_1.txt
+=== Ontology Evaluation Report ===
+Comparing Reference Ontology with Generated Ontology.
+== Entities ==
+Metrics for Classes:
+Precision: NA
+Recall: NA
+F1 Score: NA
+Total Elements: 0
+Matched Elements: 0
+Metrics for Object properties:
+Precision: NA
+Recall: NA
+F1 Score: NA
+Total Elements: 0
+Matched Elements: 0
+Metrics for Individuals:
+Precision: NA
+Recall: NA
+F1 Score: NA
+Total Elements: 0
+Matched Elements: 0
+Overall Metrics (Synthesis):
+Precision: NA
+Recall: NA
+F1 Score: NA
+== Taxonomic relations ==
+Metrics for Subclass:
+Precision: NA
+Recall: NA
+F1 Score: NA
+Total Elements: 0
+Matched Elements: 0
+Metrics for Subproperty:
+Precision: NA
+Recall: NA
+F1 Score: NA
+Total Elements: 0
+Matched Elements: 0
+Metrics for Instanciation:
+Precision: NA
+Recall: NA
+F1 Score: NA
+Total Elements: 0
+Matched Elements: 0
+Overall Metrics (Synthesis):
+Precision: NA
+Recall: NA
+F1 Score: NA
+== Non taxonomic relations ==
+Metrics for Object properties:
+Precision: NA
+Recall: NA
+F1 Score: NA
+Total Elements: 0
+Matched Elements: 0
+Metrics for Data properties:
+Precision: NA
+Recall: NA
+F1 Score: NA
+Total Elements: 0
+Matched Elements: 0
+Overall Metrics (Synthesis):
+Precision: NA
+Recall: NA
+F1 Score: NA
+== Axioms ==
+Metrics for Restriction axioms:
+Precision: NA
+Recall: NA
+F1 Score: NA
+Total Elements: 0
+Matched Elements: 0
+Overall Metrics (Synthesis):
+Precision: NA
+Recall: NA
+F1 Score: NA
--- a/output_reports/summary_report.txt
+++ b/output_reports/summary_report.txt
+Summary Report for All Ontologies
+================================================================================
+Metrics for Ontology Pair 1:
+=== Ontology Evaluation Report ===
+Comparing Reference Ontology with Generated Ontology.
+== Entities ==
+Metrics for Classes:
+Precision: 1.0000
+Recall: 0.7500
+F1 Score: 0.8571
+Total Elements: 12
+Matched Elements: 9
+Metrics for Object properties:
+Precision: 1.0000
+Recall: 1.0000
+F1 Score: 1.0000
+Total Elements: 8
+Matched Elements: 8
+Metrics for Individuals:
+Precision: 1.0000
+Recall: 1.0000
+F1 Score: 1.0000
+Total Elements: 1
+Matched Elements: 1
+Overall Metrics (Synthesis):
+Precision: 1.0000
+Recall: 0.8571
+F1 Score: 0.9231
+== Taxonomic relations ==
+Metrics for Subclass:
+Precision: 0.7000
+Recall: 0.5833
+F1 Score: 0.6364
+Total Elements: 15
+Matched Elements: 7
+Metrics for Subproperty:
+Precision: 1.0000
+Recall: 0.8889
+F1 Score: 0.9412
+Total Elements: 9
+Matched Elements: 8
+Metrics for Instanciation:
+Precision: 0.7500
+Recall: 0.6000
+F1 Score: 0.6667
+Total Elements: 6
+Matched Elements: 3
+Overall Metrics (Synthesis):
+Precision: 0.8182
+Recall: 0.6923
+F1 Score: 0.7500
+== Non taxonomic relations ==
+Metrics for Object properties:
+Precision: NA
+Recall: NA
+F1 Score: NA
+Total Elements: 0
+Matched Elements: 0
+Metrics for Data properties:
+Precision: NA
+Recall: NA
+F1 Score: NA
+Total Elements: 0
+Matched Elements: 0
+Overall Metrics (Synthesis):
+Precision: NA
+Recall: NA
+F1 Score: NA
+== Axioms ==
+Metrics for Restriction axioms:
+Precision: 1.0000
+Recall: 0.6250
+F1 Score: 0.7692
+Total Elements: 8
+Matched Elements: 5
+Overall Metrics (Synthesis):
+Precision: 1.0000
+Recall: 0.6250
+F1 Score: 0.7692
+--------------------------------------------------------------------------------