Skip to content
Snippets Groups Projects
Commit 997e1cbc authored by Eliott Sammier's avatar Eliott Sammier
Browse files

Reorganise code files and tooling

- Move all source files to `src/` dir to separate code and meta
- Add explicit linter settings, auto-format and recommended extensions
- Update README and CLI tool with new entrypoint
parent 5a7cc90d
Branches
No related tags found
No related merge requests found
...@@ -51,6 +51,8 @@ extract-rdf ...@@ -51,6 +51,8 @@ extract-rdf
Run the extractor to generate RDF from text sources Run the extractor to generate RDF from text sources
extract-mp3 extract-mp3
Extract audio streams from all Flash SWF files Extract audio streams from all Flash SWF files
convert
Run the full conversion process (extract -> transform -> export)
help help
Print this help and exit Print this help and exit
EOF EOF
...@@ -70,6 +72,13 @@ check_file() { ...@@ -70,6 +72,13 @@ check_file() {
return 1 return 1
} }
activate_venv() {
if ! source "$SCRIPTS_DIR/venv/bin/activate"; then
echo "Python venv not found, did you run setup first?" >&2
exit 1
fi
}
# List MP3 streams in a file # List MP3 streams in a file
list_streams() { list_streams() {
ffprobe -i "$1" 2>&1 | grep -E 'Stream.*Audio: mp3' ffprobe -i "$1" 2>&1 | grep -E 'Stream.*Audio: mp3'
...@@ -206,13 +215,13 @@ count-all) ...@@ -206,13 +215,13 @@ count-all)
setup) setup)
"$SCRIPTS_DIR/setup.sh" "$SCRIPTS_DIR/setup.sh"
;; ;;
convert)
activate_venv
python "$SCRIPTS_DIR/src/main.py"
;;
extract-rdf) extract-rdf)
if source "$SCRIPTS_DIR/venv/bin/activate"; then activate_venv
python "$SCRIPTS_DIR/extract.py" python "$SCRIPTS_DIR/src/extract.py"
else
echo "Python venv not found, did you run setup first?" >&2
exit 1
fi
;; ;;
extract-mp3) extract-mp3)
for audio_file in "$SOURCES_DIR/contenu/media/"*.swf; do for audio_file in "$SOURCES_DIR/contenu/media/"*.swf; do
...@@ -221,10 +230,7 @@ extract-mp3) ...@@ -221,10 +230,7 @@ extract-mp3)
;; ;;
shell) shell)
if [[ "$1" = "-p" || "$1" = "--pyenv" ]]; then if [[ "$1" = "-p" || "$1" = "--pyenv" ]]; then
if ! source "$SCRIPTS_DIR/venv/bin/activate"; then activate_venv
echo "Python venv not found, did you run setup first?" >&2
exit 1
fi
fi fi
export PATH="$PATH:$MACAO_ROOT" export PATH="$PATH:$MACAO_ROOT"
cd "$MACAO_ROOT" cd "$MACAO_ROOT"
......
{ {
"recommendations": [ "recommendations": [
"detachhead.basedpyright", "detachhead.basedpyright",
"ms-python.black-formatter" "ms-python.black-formatter",
"ms-python.isort"
] ]
} }
\ No newline at end of file
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
"name": "Python Debugger: Current File", "name": "Python Debugger: Current File",
"type": "debugpy", "type": "debugpy",
"request": "launch", "request": "launch",
"program": "extract.py", "program": "src/extract.py",
"console": "integratedTerminal" "console": "integratedTerminal"
} }
] ]
......
{ {
"python.analysis.typeCheckingMode": "basic", "basedpyright.analysis.diagnosticMode": "workspace",
"python.analysis.autoImportCompletions": true "basedpyright.analysis.exclude": [
"venv/"
],
"basedpyright.disableOrganizeImports": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "always"
},
"editor.formatOnSave": true,
"python.analysis.autoImportCompletions": true,
"python.analysis.typeCheckingMode": "basic"
} }
\ No newline at end of file
...@@ -9,7 +9,7 @@ Ensuite pour chaque shell, il est nécessaire de `source venv/bin/activate` avan ...@@ -9,7 +9,7 @@ Ensuite pour chaque shell, il est nécessaire de `source venv/bin/activate` avan
de pouvoir lancer Python. de pouvoir lancer Python.
```sh ```sh
python extract.py python src/extract.py
``` ```
`extract.py` est le point d'entrée de l'extracteur, qui produit une représentation `extract.py` est le point d'entrée de l'extracteur, qui produit une représentation
RDF des contenus textuels extraits de Macao12. RDF des contenus textuels extraits de Macao12.
......
...@@ -3,8 +3,7 @@ from sys import stderr ...@@ -3,8 +3,7 @@ from sys import stderr
from typing import Any from typing import Any
from lxml import html from lxml import html
from rdflib import Graph, Literal, RDFS, URIRef from rdflib import RDFS, Graph, Literal, Namespace, URIRef
from rdflib import Namespace
def env_path_or_rel_default(env_var: str, default: str) -> str: def env_path_or_rel_default(env_var: str, default: str) -> str:
...@@ -20,15 +19,17 @@ def env_path_or_rel_default(env_var: str, default: str) -> str: ...@@ -20,15 +19,17 @@ def env_path_or_rel_default(env_var: str, default: str) -> str:
MODULE_DIR = path.dirname(path.realpath(__file__)) MODULE_DIR = path.dirname(path.realpath(__file__))
"""Absolute path of this module's directory""" """Absolute path of this module's directory"""
MACAO_ROOT = env_path_or_rel_default("MACAO_ROOT", "../../..") MACAO_ROOT = env_path_or_rel_default("MACAO_ROOT", "../../../..")
"""Path to the Macao root directory""" """Path to the Macao root directory"""
SOURCE_DIR = env_path_or_rel_default("SOURCES_DIR", "../../../Basilisk/MACAO/macao_12") SOURCE_DIR = env_path_or_rel_default(
"SOURCES_DIR", "../../../../Basilisk/MACAO/macao_12"
)
"""Path to the Macao source directory (i.e. the one with the manifest)""" """Path to the Macao source directory (i.e. the one with the manifest)"""
RESULT_DIR = env_path_or_rel_default("RESULTS_DIR", "../result") RESULT_DIR = env_path_or_rel_default("RESULTS_DIR", "../../result")
"""Path to the output directory""" """Path to the output directory"""
RESULT_FILE = env_path_or_rel_default("RESULT_FILE", "../result/macao_content.ttl") RESULT_FILE = env_path_or_rel_default("RESULT_FILE", RESULT_DIR + "/macao_content.ttl")
"""Path to the Turtle output file""" """Path to the Turtle output file"""
SCHEMA_FILE = env_path_or_rel_default("SCHEMA_FILE", "../macao_schema.ttl") SCHEMA_FILE = env_path_or_rel_default("SCHEMA_FILE", "../../macao_schema.ttl")
"""Path to the schema file""" """Path to the schema file"""
NS = Namespace("http://www.semanticweb.org/eliott/ontologies/2024/4/macao/") NS = Namespace("http://www.semanticweb.org/eliott/ontologies/2024/4/macao/")
......
...@@ -5,12 +5,16 @@ isomorphic, and their differences otherwise. ...@@ -5,12 +5,16 @@ isomorphic, and their differences otherwise.
Implemented from https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#module-rdflib.compare Implemented from https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#module-rdflib.compare
""" """
from sys import argv, stderr from sys import argv, stderr
from rdflib import Graph from rdflib import Graph
from rdflib.compare import to_isomorphic, graph_diff from rdflib.compare import graph_diff, to_isomorphic
def dump_nt_sorted(g: Graph): def dump_nt_sorted(g: Graph):
for l in sorted(g.serialize(format='nt').splitlines()): for l in sorted(g.serialize(format="nt").splitlines()):
if l: print("\t"+l) if l:
print("\t" + l)
def main(): def main():
if len(argv) < 3: if len(argv) < 3:
...@@ -33,5 +37,6 @@ def main(): ...@@ -33,5 +37,6 @@ def main():
print("In second:") print("In second:")
dump_nt_sorted(in_second) dump_nt_sorted(in_second)
if __name__ == "__main__": if __name__ == "__main__":
main() main()
from os import path
import re import re
import subprocess import subprocess
from os import path
from rdflib import Graph, Literal, OWL, RDF, RDFS from rdflib import OWL, RDF, RDFS, Graph, Literal
from common import * from common import *
from extract_page import parse_page from extract_page import parse_page
......
import re
from abc import abstractmethod from abc import abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
import re
from typing import Any, Callable from typing import Any, Callable
import esprima as es import esprima as es
from lxml import etree, html from lxml import etree, html
from lxml.etree import _Element from lxml.etree import _Element
from lxml.html import HtmlElement from lxml.html import HtmlElement
from rdflib import Graph, Literal, RDF from rdflib import RDF, Graph, Literal
from typing_extensions import override from typing_extensions import override
from common import * from common import *
......
import extract
def main():
extract.main()
# transform.main()
# export.main()
if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment