Skip to content
Snippets Groups Projects
Commit 65b0c4be authored by Aurélien Lamercerie's avatar Aurélien Lamercerie
Browse files

New module scorer_batch to process multiple pairs of ontologies

parent 2e696ad5
No related branches found
No related tags found
No related merge requests found
...@@ -2,29 +2,143 @@ ...@@ -2,29 +2,143 @@
# -*-coding:Utf-8 -* # -*-coding:Utf-8 -*
""" """
ontoScorer: [brief description of the module] ontoScorer: Batch evaluation of ontology pairs.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
Detailed module description, if needed This script processes multiple pairs of reference and generated ontologies, computes comparison metrics,
and produces reports for each pair as well as a summary report for all pairs.
""" """
from ontoScorer.scorer import OntoScorer import os
from ontoScorer.scorer_batch import ScorerBatch
def main(): #--------------------------------------------------------------------------
# Paths to data # Parameters
DATA_FOLDER_PATH = "data" #--------------------------------------------------------------------------
REFERENCE_ONTOLOGY_PATH = f"{DATA_FOLDER_PATH}/reference_ontology.ttl"
GENERATED_ONTOLOGY_PATH = f"{DATA_FOLDER_PATH}/generated_ontology.ttl" # Directory containing the ontologies
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_FOLDER_PATH = os.path.join(SCRIPT_DIR, "data")
OUTPUT_FOLDER_PATH = os.path.join(SCRIPT_DIR, "output_reports")
# Equivalent Prefix (used to harmonize the ontologies) # List of ontology pairs (reference, generated) for evaluation.
# Ideally, these paths would be dynamically generated or loaded from some configuration.
ONTOLOGY_PAIRS = [
(f"{DATA_FOLDER_PATH}/reference_ontology_1.ttl", f"{DATA_FOLDER_PATH}/generated_ontology_1.ttl"),
# (f"{DATA_FOLDER_PATH}/reference_ontology2.ttl", f"{DATA_FOLDER_PATH}/generated_ontology2.ttl"),
# ...
]
# Prefixes used to harmonize or align concepts in the ontologies
EQUIVALENT_PREFIX = [ EQUIVALENT_PREFIX = [
("base", "https://reference.tetras-libre.fr/base-ontology#", "https://tenet.tetras-libre.fr/base-ontology#"), ("base", "https://reference.tetras-libre.fr/base-ontology#", "https://tenet.tetras-libre.fr/base-ontology#"),
("result", "https://reference.tetras-libre.fr/expected-result#", "https://tenet.tetras-libre.fr/extract-result#") ("result", "https://reference.tetras-libre.fr/expected-result#", "https://tenet.tetras-libre.fr/extract-result#")
] ]
# Scorer Process Run
scorer = OntoScorer(REFERENCE_ONTOLOGY_PATH, GENERATED_ONTOLOGY_PATH, EQUIVALENT_PREFIX) #--------------------------------------------------------------------------
scorer.compute_metrics() # Useful Method(s)
scorer.generate_report() #--------------------------------------------------------------------------
def common_prefix(strings):
""" Return the longest common prefix of the list of strings. """
if not strings:
return ''
prefix = strings[0]
for string in strings:
while string[:len(prefix)] != prefix:
prefix = prefix[:-1]
return prefix
def common_suffix(strings):
""" Return the longest common suffix of the list of strings. """
if not strings:
return ''
suffix = strings[0]
for string in strings:
while string[-len(suffix):] != suffix:
suffix = suffix[1:]
return suffix
def build_ontology_pairs(reference_subfolder_name, generated_subfolder_name):
"""
Build pairs of ontology files from two subfolders based on their filename similarity.
Args:
- reference_subfolder_name (str): Name of the reference ontology subfolder.
- generated_subfolder_name (str): Name of the generated ontology subfolder.
Returns:
- list of tuples: Each tuple contains a pair of file paths (reference, generated).
"""
reference_subfolder = os.path.join(DATA_FOLDER_PATH, reference_subfolder_name)
generated_subfolder = os.path.join(DATA_FOLDER_PATH, generated_subfolder_name)
reference_files = sorted(os.listdir(reference_subfolder))
generated_files = sorted(os.listdir(generated_subfolder))
pairs = []
# Identify the common prefix and suffix for reference files
ref_common_prefix = common_prefix(reference_files)
ref_common_suffix = common_suffix(reference_files)
# Identify the common prefix and suffix for generated files
gen_common_prefix = common_prefix(generated_files)
gen_common_suffix = common_suffix(generated_files)
# Extract the varying parts from filenames
ref_variants = [f[len(ref_common_prefix):-len(ref_common_suffix)] for f in reference_files]
gen_variants = [f[len(gen_common_prefix):-len(gen_common_suffix)] for f in generated_files]
# Pair up matching variants
for r_variant, r_file in zip(ref_variants, reference_files):
if r_variant in gen_variants:
# Use the actual file from the generated_files list
g_file_index = gen_variants.index(r_variant)
g_file = generated_files[g_file_index]
pairs.append((os.path.join(reference_subfolder, r_file),
os.path.join(generated_subfolder, g_file)))
return pairs
#--------------------------------------------------------------------------
# Main Method
#--------------------------------------------------------------------------
def main():
print("'\n === OntoScorer Run === '")
print("\n" + "-- Initialization")
print("----- Building ontology pairs...")
# ontology_pairs = ONTOLOGY_PAIRS
ontology_pairs = build_ontology_pairs('reference_ontology', 'generated_ontology')
print(f"----- Found {len(ontology_pairs)} ontology pair{'s' if len(ontology_pairs) > 1 else ''}.")
print("----- Initializing ScorerBatch...")
scorer_batch = ScorerBatch(ontology_pairs, EQUIVALENT_PREFIX, OUTPUT_FOLDER_PATH)
print("\n" + "-- Analyze")
print("----- Computing metrics for all ontology pairs...")
scorer_batch.compute_all_metrics()
print("\n" + "-- Report(s)")
print("----- Generating individual reports for each ontology pair...")
scorer_batch.generate_all_reports()
print("----- Generating summary report...")
scorer_batch.generate_summary_report()
print("\n" + "-- Process completed!")
if __name__ == "__main__": if __name__ == "__main__":
main() main()
...@@ -49,6 +49,6 @@ class OntoScorer: ...@@ -49,6 +49,6 @@ class OntoScorer:
def generate_report(self): def generate_report(self):
report = Report(self.reference_ontology, self.generated_ontology, self.metrics) report = Report(self.reference_ontology, self.generated_ontology, self.metrics)
print(report.generate()) return report.generate()
#!/usr/bin/python3.10
# -*-coding:Utf-8 -*-
"""
ontoScorer: Batch Module for comparing multiple pairs of reference and generated ontologies.
------------------------------------------------------------------------------------------
This module facilitates the batch processing of ontology pairs, computing metrics and
generating comparison reports for each pair and a summary report for all.
"""
import os
from ontology import Ontology
from report import Report
from metrics import Metrics
from scorer import OntoScorer
class ScorerBatch:
"""
The ScorerBatch class is used to compare multiple pairs of reference and generated ontologies.
Attributes:
- ontology_pairs (list): List of tuples, where each tuple contains the paths to the reference and generated ontology.
- output_dir (str): Path to the directory where the reports will be written.
- results (list): List of metrics for each pair of ontologies.
"""
def __init__(self, ontology_pairs, equivalent_prefix, output_dir):
"""
Initializes the ScorerBatch with a list of ontology pairs and output directory.
Args:
- ontology_pairs (list): List of tuples. Each tuple contains paths to the reference and generated ontology.
- equivalent_prefix (str): Prefix to handle equivalent terms or concepts.
- output_dir (str): Path to the directory where the reports will be written.
"""
self.ontology_pairs = ontology_pairs
self.equivalent_prefix = equivalent_prefix
self.output_dir = output_dir
self.results = {}
# Create the output directory if it doesn't exist
os.makedirs(self.output_dir, exist_ok=True)
def compute_all_metrics(self):
"""
Computes metrics for each pair of ontologies and stores them in the results dictionary.
"""
for ref_onto, gen_onto in self.ontology_pairs:
scorer = OntoScorer(ref_onto, gen_onto, self.equivalent_prefix)
scorer.compute_metrics()
self.results[(ref_onto, gen_onto)] = scorer.metrics
def generate_all_reports(self):
"""
Generates and writes a report for each pair of ontologies to the output directory.
"""
for idx, (ref_onto, gen_onto) in enumerate(self.ontology_pairs):
scorer = OntoScorer(ref_onto, gen_onto, self.equivalent_prefix)
report_content = scorer.generate_report()
report_filename = os.path.join(self.output_dir, f'report_{idx + 1}.txt')
with open(report_filename, 'w') as f:
f.write(report_content)
def generate_summary_report(self):
"""
Generates a summary report for all the ontologies and writes it to the output directory.
"""
summary_content = "Summary Report for All Ontologies\n"
summary_content += "="*80 + "\n"
for idx, ((ref_onto, gen_onto), metrics) in enumerate(self.results.items()): # Adjusted loop to access metrics from dictionary
summary_content += f"Metrics for Ontology Pair {idx + 1}:\n"
report = Report(ref_onto, gen_onto, metrics)
summary_content += report.generate() + "\n"
summary_content += "-"*80 + "\n"
summary_filename = os.path.join(self.output_dir, 'summary_report.txt')
with open(summary_filename, 'w') as f:
f.write(summary_content)
=== Ontology Evaluation Report ===
Comparing Reference Ontology with Generated Ontology.
== Entities ==
Metrics for Classes:
Precision: NA
Recall: NA
F1 Score: NA
Total Elements: 0
Matched Elements: 0
Metrics for Object properties:
Precision: NA
Recall: NA
F1 Score: NA
Total Elements: 0
Matched Elements: 0
Metrics for Individuals:
Precision: NA
Recall: NA
F1 Score: NA
Total Elements: 0
Matched Elements: 0
Overall Metrics (Synthesis):
Precision: NA
Recall: NA
F1 Score: NA
== Taxonomic relations ==
Metrics for Subclass:
Precision: NA
Recall: NA
F1 Score: NA
Total Elements: 0
Matched Elements: 0
Metrics for Subproperty:
Precision: NA
Recall: NA
F1 Score: NA
Total Elements: 0
Matched Elements: 0
Metrics for Instanciation:
Precision: NA
Recall: NA
F1 Score: NA
Total Elements: 0
Matched Elements: 0
Overall Metrics (Synthesis):
Precision: NA
Recall: NA
F1 Score: NA
== Non taxonomic relations ==
Metrics for Object properties:
Precision: NA
Recall: NA
F1 Score: NA
Total Elements: 0
Matched Elements: 0
Metrics for Data properties:
Precision: NA
Recall: NA
F1 Score: NA
Total Elements: 0
Matched Elements: 0
Overall Metrics (Synthesis):
Precision: NA
Recall: NA
F1 Score: NA
== Axioms ==
Metrics for Restriction axioms:
Precision: NA
Recall: NA
F1 Score: NA
Total Elements: 0
Matched Elements: 0
Overall Metrics (Synthesis):
Precision: NA
Recall: NA
F1 Score: NA
Summary Report for All Ontologies
================================================================================
Metrics for Ontology Pair 1:
=== Ontology Evaluation Report ===
Comparing Reference Ontology with Generated Ontology.
== Entities ==
Metrics for Classes:
Precision: 1.0000
Recall: 0.7500
F1 Score: 0.8571
Total Elements: 12
Matched Elements: 9
Metrics for Object properties:
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000
Total Elements: 8
Matched Elements: 8
Metrics for Individuals:
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000
Total Elements: 1
Matched Elements: 1
Overall Metrics (Synthesis):
Precision: 1.0000
Recall: 0.8571
F1 Score: 0.9231
== Taxonomic relations ==
Metrics for Subclass:
Precision: 0.7000
Recall: 0.5833
F1 Score: 0.6364
Total Elements: 15
Matched Elements: 7
Metrics for Subproperty:
Precision: 1.0000
Recall: 0.8889
F1 Score: 0.9412
Total Elements: 9
Matched Elements: 8
Metrics for Instanciation:
Precision: 0.7500
Recall: 0.6000
F1 Score: 0.6667
Total Elements: 6
Matched Elements: 3
Overall Metrics (Synthesis):
Precision: 0.8182
Recall: 0.6923
F1 Score: 0.7500
== Non taxonomic relations ==
Metrics for Object properties:
Precision: NA
Recall: NA
F1 Score: NA
Total Elements: 0
Matched Elements: 0
Metrics for Data properties:
Precision: NA
Recall: NA
F1 Score: NA
Total Elements: 0
Matched Elements: 0
Overall Metrics (Synthesis):
Precision: NA
Recall: NA
F1 Score: NA
== Axioms ==
Metrics for Restriction axioms:
Precision: 1.0000
Recall: 0.6250
F1 Score: 0.7692
Total Elements: 8
Matched Elements: 5
Overall Metrics (Synthesis):
Precision: 1.0000
Recall: 0.6250
F1 Score: 0.7692
--------------------------------------------------------------------------------
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment