Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
Macao Legacy
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MACAO
Macao Legacy
Commits
e0e81c02
Commit
e0e81c02
authored
May 23, 2024
by
Eliott Sammier
Browse files
Options
Downloads
Patches
Plain Diff
Implement parsing of the manifest to generate RDF triples
parent
07c026de
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
Basilisk/MACAO/macao_12/extracted/extract.py
+78
-19
78 additions, 19 deletions
Basilisk/MACAO/macao_12/extracted/extract.py
with
78 additions
and
19 deletions
Basilisk/MACAO/macao_12/extracted/extract.py
+
78
−
19
View file @
e0e81c02
from
rdflib
import
Graph
,
Namespace
,
BNode
,
URIRef
from
pprint
import
pprint
from
rdflib
import
RDFS
,
Graph
,
Namespace
,
BNode
,
URIRef
,
Literal
from
rdflib.namespace
import
OWL
,
RDF
from
lxml
import
etree
SOURCE_DIR
=
"
..
"
OUTPUT_DIR
=
"
out
"
OUT_FILE
=
OUTPUT_DIR
+
"
/macao_content.ttl
"
SCHEMA_FILE
=
"
macao_schema.ttl
"
OUT_FILE
=
"
out/out.ttl
"
NAMESPACE
=
Namespace
(
"
http://www.semanticweb.org/eliott/ontologies/2024/4/macao/
"
)
NS
=
Namespace
(
"
http://www.semanticweb.org/eliott/ontologies/2024/4/macao/
"
)
def
dump_graph
(
g
:
Graph
):
"""
Print all triples in the graph
"""
# Loop through each triple in the graph (subj, pred, obj)
for
subj
,
pred
,
obj
in
g
:
print
(
subj
,
pred
,
obj
)
def
create_graph
()
->
Graph
:
g
=
Graph
()
g
.
bind
(
""
,
NAMESPACE
)
g
.
add
((
NAMESPACE
[
"
MosEtp129
"
],
RDF
.
type
,
OWL
.
NamedIndividual
))
g
.
bind
(
""
,
NS
)
# Bind default namespace to empty prefix
return
g
...
...
@@ -28,20 +29,78 @@ def export_graph(g: Graph):
"""
imports
=
BNode
()
g
.
add
((
imports
,
RDF
.
type
,
OWL
.
Ontology
))
g
.
add
((
imports
,
OWL
.
imports
,
URIRef
(
NAMESPACE
)))
g
.
serialize
(
OUT_FILE
,
base
=
NAMESPACE
)
g
.
add
((
imports
,
OWL
.
imports
,
URIRef
(
NS
)))
g
.
serialize
(
OUT_FILE
,
'
turtle
'
,
base
=
NS
)
print
(
f
"
Exported
{
len
(
g
)
}
triples to
{
OUT_FILE
}
.
"
)
def
main
():
g
=
create_graph
()
# g.parse(SCHEMA_FILE)
g
.
parse
(
"
macao_contents.ttl
"
)
# dump_graph(g)
export_graph
(
g
)
# Print the number of triples in the Graph
print
(
f
"
Graph g has
{
len
(
g
)
}
statements.
"
)
def
ns_find
(
elem
:
etree
.
ElementBase
,
query
:
str
):
"""
Wrapper for lxml
'
s `find()` function that automatically uses the default
namespace for all unprefixed tag names.
"""
return
elem
.
find
(
query
,
namespaces
=
{
""
:
elem
.
nsmap
[
None
]})
def
ns_findall
(
elem
:
etree
.
ElementBase
,
query
:
str
):
"""
Wrapper for lxml
'
s `findall()` function that automatically uses the default
namespace for all unprefixed tag names.
"""
return
elem
.
findall
(
query
,
namespaces
=
{
""
:
elem
.
nsmap
[
None
]})
def
ns_localname
(
elem
:
etree
.
ElementBase
)
->
str
:
"""
Get an element
'
s local name, stripping the namespace.
"""
return
etree
.
QName
(
elem
).
localname
def
parse_manifest
(
graph
:
Graph
):
"""
Parses the `imsmanifest.xml` and populates the `graph` with the
modules hierarchy.
"""
# Parse with lxml
root
=
etree
.
parse
(
SOURCE_DIR
+
"
/imsmanifest.xml
"
,
None
).
getroot
()
org
=
ns_find
(
root
,
"
.//organization
"
)
for
e
in
ns_findall
(
org
,
"
item
"
):
print
(
ns_localname
(
e
),
e
.
get
(
"
identifier
"
))
parse_manifest_rec
(
graph
,
e
)
graph
.
add
((
NS
[
e
.
get
(
"
identifier
"
)],
RDFS
.
subClassOf
,
NS
[
"
MacaoRoot
"
]))
def
parse_manifest_rec
(
graph
:
Graph
,
elem
,
parentResource
=
None
):
"""
Parses a module `MosMod` from the manifest recursively, adding all its
descendants to the `graph`
"""
# Get title and ID
title
:
str
=
ns_find
(
elem
,
"
title
"
).
text
id
:
str
=
elem
.
get
(
"
identifier
"
)
# Declare RDF resource and simple properties
subject
=
NS
[
id
]
graph
.
add
((
subject
,
RDF
.
type
,
OWL
.
NamedIndividual
))
graph
.
add
((
subject
,
NS
[
"
titre
"
],
Literal
(
title
)))
graph
.
add
((
subject
,
RDFS
.
label
,
Literal
(
title
)))
if
id
.
startswith
(
"
MosMod
"
):
# It's a Module:
graph
.
add
((
subject
,
RDF
.
type
,
NS
[
"
Module
"
]))
# Add parent properties if necessary
if
parentResource
is
not
None
:
graph
.
add
((
parentResource
,
NS
[
"
contientModule
"
],
subject
))
graph
.
add
((
subject
,
RDFS
.
subClassOf
,
parentResource
))
# Recurse on child items
for
child
in
ns_findall
(
elem
,
"
item
"
):
parse_manifest_rec
(
graph
,
child
,
subject
)
else
:
# It's a Subpart
graph
.
add
((
subject
,
RDF
.
type
,
NS
[
"
SousPartie
"
]))
# Add parent properties if necessary
if
parentResource
is
not
None
:
graph
.
add
((
parentResource
,
NS
[
"
contientSousPartie
"
],
subject
))
graph
.
add
((
subject
,
RDFS
.
subClassOf
,
parentResource
))
def
main
():
g
=
create_graph
()
parse_manifest
(
g
)
export_graph
(
g
)
if
__name__
==
"
__main__
"
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment