Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
tenet
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Tetras MARS
tenet
Commits
06e7a5c6
Commit
06e7a5c6
authored
2 years ago
by
David Rouquet
Browse files
Options
Downloads
Patches
Plain Diff
Fix some bug and clean code
parent
634cc1c4
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
tenet/__init__.py
+0
-1
0 additions, 1 deletion
tenet/__init__.py
tenet/main.py
+46
-107
46 additions, 107 deletions
tenet/main.py
with
46 additions
and
108 deletions
tenet/__init__.py
+
0
−
1
View file @
06e7a5c6
# -- Update System Path
# -- Update System Path
import
os
,
sys
import
os
,
sys
LIB_PATH
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
+
'
/
'
LIB_PATH
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
+
'
/
'
print
(
'
Running in
'
+
LIB_PATH
)
os
.
chdir
(
LIB_PATH
)
os
.
chdir
(
LIB_PATH
)
sys
.
path
.
insert
(
0
,
os
.
path
.
abspath
(
LIB_PATH
))
sys
.
path
.
insert
(
0
,
os
.
path
.
abspath
(
LIB_PATH
))
...
...
This diff is collapsed.
Click to expand it.
tenet/main.py
+
46
−
107
View file @
06e7a5c6
...
@@ -36,38 +36,10 @@ logger = logging.getLogger('root')
...
@@ -36,38 +36,10 @@ logger = logging.getLogger('root')
def
__set_context
():
def
__set_context
():
# LIB_PATH = os.path.dirname(os.path.abspath(__file__)) + '/'
# LIB_PATH = os.path.dirname(os.path.abspath(__file__)) + '/'
print
(
f
'
Running in
{
LIB_PATH
}
'
)
print
(
f
'
Tenet
Running in
{
LIB_PATH
}
'
)
os
.
chdir
(
LIB_PATH
)
os
.
chdir
(
LIB_PATH
)
# def __set_config(
# config_file_path,
# source_type, source_corpus, onto_prefix,
# base_output_dir, technical_dir_path):
# logger.info("-- Process Setting ")
# logger.info(f'----- Corpus source: {source_corpus} ({source_type})')
# logger.info(f'----- Base output dir: {base_output_dir}')
# logger.info(f'----- technical dir path: {technical_dir_path}')
# logger.info(f'----- Ontology target (id): {onto_prefix}')
# logger.info(f'----- Current path: {os.getcwd()}')
# logger.debug(f'----- Config file: {config_file_path}')
# process_config = config.Config(config_file_path,
# onto_prefix,
# source_corpus,
# base_output_dir = base_output_dir,
# technical_dir_path = technical_dir_path,
# source_type = source_type
# )
# #process_config.source_type = source_type
# # config.output_ontology_namespace = target_ontology_namespace
# logger.debug(process_config.get_full_config())
# return process_config
def
__set_config
(
config_dict
):
def
__set_config
(
config_dict
):
config_file_path
=
config_dict
[
'
config_file_path
'
]
config_file_path
=
config_dict
[
'
config_file_path
'
]
source_type
=
config_dict
[
'
source_type
'
]
source_type
=
config_dict
[
'
source_type
'
]
...
@@ -84,14 +56,6 @@ def __set_config(config_dict):
...
@@ -84,14 +56,6 @@ def __set_config(config_dict):
logger
.
info
(
f
'
----- Current path:
{
os
.
getcwd
()
}
'
)
logger
.
info
(
f
'
----- Current path:
{
os
.
getcwd
()
}
'
)
logger
.
debug
(
f
'
----- Config file:
{
config_file_path
}
'
)
logger
.
debug
(
f
'
----- Config file:
{
config_file_path
}
'
)
# process_config = config.Config(
# config_file_path,
# onto_prefix,
# source_corpus,
# base_output_dir=base_output_dir,
# technical_dir_path=technical_dir_path,
# source_type=source_type
# )
base_config
=
config
.
Config
(
config_dict
)
base_config
=
config
.
Config
(
config_dict
)
logger
.
debug
(
base_config
.
get_full_config
())
logger
.
debug
(
base_config
.
get_full_config
())
...
@@ -112,7 +76,6 @@ def __apply_extraction(config, sentence_file):
...
@@ -112,7 +76,6 @@ def __apply_extraction(config, sentence_file):
if
config
.
technical_dir_path
is
not
None
:
if
config
.
technical_dir_path
is
not
None
:
os
.
makedirs
(
config
.
sentence_output_dir
,
exist_ok
=
True
)
os
.
makedirs
(
config
.
sentence_output_dir
,
exist_ok
=
True
)
work_graph
=
structure
.
prepare_sentence_work
(
config
,
sentence_file
)
work_graph
=
structure
.
prepare_sentence_work
(
config
,
sentence_file
)
_
,
new_triple_list
=
process
.
apply
(
config
,
work_graph
)
_
,
new_triple_list
=
process
.
apply
(
config
,
work_graph
)
return
new_triple_list
return
new_triple_list
...
@@ -142,6 +105,21 @@ def __serialize_factoid_graph(config, factoid_graph, out_file_path=None):
...
@@ -142,6 +105,21 @@ def __serialize_factoid_graph(config, factoid_graph, out_file_path=None):
return
ontology_turtle_string
return
ontology_turtle_string
#==============================================================================
# Extraction Run
#==============================================================================
def
run_extraction
(
arg_dict
):
process_config
=
config
.
Config
(
arg_dict
)
sentence_indice
=
arg_dict
[
'
sentence_list_indice
'
]
sentence_file
=
arg_dict
[
'
sentence_file
'
]
logger
.
info
(
f
'
*** sentence
{
sentence_indice
}
***
'
)
process_config
.
sentence_output_dir
=
f
'
-
{
sentence_indice
}
'
new_triple_list
=
__apply_extraction
(
process_config
,
sentence_file
)
return
(
new_triple_list
)
#==============================================================================
#==============================================================================
# AMR Main Methods (to create an ontology) - with one processing
# AMR Main Methods (to create an ontology) - with one processing
#==============================================================================
#==============================================================================
...
@@ -189,10 +167,6 @@ def create_ontology_from_amrld_file(amrld_file_path,
...
@@ -189,10 +167,6 @@ def create_ontology_from_amrld_file(amrld_file_path,
config
=
__set_config
(
config_dict
)
config
=
__set_config
(
config_dict
)
# config = __set_config(OWL_CONFIG_FILE_PATH,
# 'amr', amrld_file_path, onto_prefix,
# base_output_dir, technical_dir_path)
assert
os
.
path
.
exists
(
amrld_file_path
),
f
'
input file does not exists (
{
amrld_file_path
}
)
'
assert
os
.
path
.
exists
(
amrld_file_path
),
f
'
input file does not exists (
{
amrld_file_path
}
)
'
# -- Extraction Processing
# -- Extraction Processing
...
@@ -259,23 +233,31 @@ def create_ontology_from_amrld_dir(amrld_dir_path,
...
@@ -259,23 +233,31 @@ def create_ontology_from_amrld_dir(amrld_dir_path,
config
=
__set_config
(
config_dict
)
config
=
__set_config
(
config_dict
)
# config = __set_config(OWL_CONFIG_FILE_PATH,
# 'amr', amrld_dir_path, onto_prefix,
# base_output_dir, technical_dir_path)
assert
os
.
path
.
exists
(
amrld_dir_path
),
f
'
input directory does not exists (
{
amrld_dir_path
}
)
'
assert
os
.
path
.
exists
(
amrld_dir_path
),
f
'
input directory does not exists (
{
amrld_dir_path
}
)
'
__count_number_of_graph
(
config
)
__count_number_of_graph
(
config
)
# -- Extraction Processing
# -- Extraction Processing
logger
.
info
(
'
\n
=== Extraction Processing ===
'
)
logger
.
info
(
'
\n
=== Extraction Processing ===
'
)
# ----- Sentence File List
sentence_dir
=
config
.
source_sentence_file
sentence_dir
=
config
.
source_sentence_file
sentence_count
=
0
sentence_file_list
=
glob
.
glob
(
sentence_dir
,
recursive
=
True
)
# ----- Computing Extraction Argument (config_dict update)
for
i
in
range
(
len
(
sentence_file_list
)):
config_dict
[
'
sentence_list_indice
'
]
=
i
config_dict
[
'
sentence_file
'
]
=
sentence_file_list
[
i
]
# ----- Single Processing Extraction Run
#sentence_count = 0
result_triple_list
=
[]
result_triple_list
=
[]
for
sentence_file
in
glob
.
glob
(
sentence_dir
,
recursive
=
True
):
for
sentence_file
in
sentence_file_list
:
#
sentence_count
+=
1
# sentence_count += 1
logger
.
info
(
f
'
*** sentence
{
sentence_count
}
***
'
)
# logger.info(f' *** sentence {sentence_count} *** ')
config
.
sentence_output_dir
=
f
'
-
{
sentence_count
}
'
# config.sentence_output_dir = f'-{sentence_count}'
new_triple_list
=
__apply_extraction
(
config
,
sentence_file
)
# new_triple_list = __apply_extraction(config, sentence_file)
# result_triple_list.extend(new_triple_list)
new_triple_list
=
run_extraction
(
config_dict
)
result_triple_list
.
extend
(
new_triple_list
)
result_triple_list
.
extend
(
new_triple_list
)
# -- Final Ontology Generation (factoid_graph)
# -- Final Ontology Generation (factoid_graph)
...
@@ -297,35 +279,11 @@ def create_ontology_from_amrld_dir(amrld_dir_path,
...
@@ -297,35 +279,11 @@ def create_ontology_from_amrld_dir(amrld_dir_path,
# AMR Main Methods (to create an ontology) - Multiprocessing
# AMR Main Methods (to create an ontology) - Multiprocessing
#==============================================================================
#==============================================================================
#global result_triple_queue
#global sentence_file_list
def
dump_queue
(
q
):
q
.
put
(
None
)
return
list
(
iter
(
q
.
get
,
None
))
def
pool_function
(
arg_dict
):
#global result_triple_queue
#global sentence_file_list
#process_config = config.Config(OWL_CONFIG_FILE_PATH, 'default', 'default')
#process_config.update_from_dict(arg_dict)
process_config
=
config
.
Config
(
arg_dict
)
sentence_indice
=
arg_dict
[
'
sentence_list_indice
'
]
sentence_file
=
sentence_file_list
[
sentence_indice
]
logger
.
info
(
f
'
*** sentence
{
sentence_indice
}
***
'
)
process_config
.
sentence_output_dir
=
f
'
-
{
sentence_indice
}
\n
'
new_triple_list
=
__apply_extraction
(
process_config
,
sentence_file
)
# The following must handled via a global queue
#result_triple_queue.extend(new_triple_list)
return
(
new_triple_list
)
#
@timed
@timed
def
create_ontology_from_amrld_dir_multi_cpu
(
amrld_dir_path
,
def
create_ontology_from_amrld_dir_multi_cpu
(
amrld_dir_path
,
base_ontology_path
=
None
,
base_ontology_path
=
None
,
onto_prefix
=
None
,
onto_prefix
=
None
,
...
@@ -352,8 +310,6 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
...
@@ -352,8 +310,6 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
Complete Ontology Turtle String (synthesis of all ontology)
Complete Ontology Turtle String (synthesis of all ontology)
"""
"""
global
result_triple_queue
global
sentence_file_list
logger
.
info
(
'
[TENET] Extraction Processing
'
)
logger
.
info
(
'
[TENET] Extraction Processing
'
)
# -- Process Initialization
# -- Process Initialization
...
@@ -373,37 +329,29 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
...
@@ -373,37 +329,29 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
config
=
__set_config
(
config_dict
)
config
=
__set_config
(
config_dict
)
# config = __set_config(OWL_CONFIG_FILE_PATH,
# 'amr', amrld_dir_path, onto_prefix,
# base_output_dir, technical_dir_path)
assert
os
.
path
.
exists
(
amrld_dir_path
),
f
'
input directory does not exists (
{
amrld_dir_path
}
)
'
assert
os
.
path
.
exists
(
amrld_dir_path
),
f
'
input directory does not exists (
{
amrld_dir_path
}
)
'
__count_number_of_graph
(
config
)
__count_number_of_graph
(
config
)
# -- Extraction Processing
# -- Extraction Processing
logger
.
info
(
'
\n
=== Extraction Processing ===
'
)
logger
.
info
(
'
\n
=== Extraction Processing ===
'
)
sentence_dir
=
config
.
source_sentence_file
sentence_count
=
0
result_triple_list
=
[]
#result_triple_queue = multiprocessing.Queue()
# ----- Sentence File List
sentence_dir
=
config
.
source_sentence_file
sentence_file_list
=
glob
.
glob
(
sentence_dir
,
recursive
=
True
)
sentence_file_list
=
glob
.
glob
(
sentence_dir
,
recursive
=
True
)
# The following is for multiprocessing logging (must be exec before the pool is created
# The following is for multiprocessing logging (must be exec before the pool is created
multiprocessing_logging
.
install_mp_handler
()
multiprocessing_logging
.
install_mp_handler
()
# config_dict = config.to_dict()
# ----- Computing Extraction Argument
#star_iterable = [(i, config) for i in range(len(sentence_file_list))]
mapIterable
=
[]
mapIterable
=
[]
for
i
in
range
(
len
(
sentence_file_list
)):
for
i
in
range
(
len
(
sentence_file_list
)):
config_dict
[
'
sentence_list_indice
'
]
=
i
config_dict
[
'
sentence_list_indice
'
]
=
i
config_dict
[
'
sentence_file
'
]
=
sentence_file_list
[
i
]
mapIterable
=
mapIterable
+
[
config_dict
.
copy
()]
mapIterable
=
mapIterable
+
[
config_dict
.
copy
()]
# ----- (Multiprocessing) Extraction Run
with
multiprocessing
.
Pool
(
processes
)
as
p
:
with
multiprocessing
.
Pool
(
processes
)
as
p
:
triplesLists
=
p
.
map
(
pool_fun
ction
,
mapIterable
)
triplesLists
=
p
.
map
(
run_extra
ction
,
mapIterable
)
result_triple_list
=
[]
result_triple_list
=
[]
for
tripleList
in
triplesLists
:
for
tripleList
in
triplesLists
:
...
@@ -411,7 +359,6 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
...
@@ -411,7 +359,6 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
# -- Final Ontology Generation (factoid_graph)
# -- Final Ontology Generation (factoid_graph)
logger
.
info
(
'
\n
=== Final Ontology Generation ===
'
)
logger
.
info
(
'
\n
=== Final Ontology Generation ===
'
)
#result_triple_list = dump_queue(result_triple_queue)
factoid_graph
=
__generate_final_ontology
(
result_triple_list
)
factoid_graph
=
__generate_final_ontology
(
result_triple_list
)
ontology_turtle_string
=
__serialize_factoid_graph
(
config
,
factoid_graph
,
out_file_path
)
ontology_turtle_string
=
__serialize_factoid_graph
(
config
,
factoid_graph
,
out_file_path
)
...
@@ -470,10 +417,6 @@ def generate_odrl_from_amrld_file(
...
@@ -470,10 +417,6 @@ def generate_odrl_from_amrld_file(
config
=
__set_config
(
config_dict
)
config
=
__set_config
(
config_dict
)
# config = __set_config(ODRL_CONFIG_FILE_PATH,
# 'amr', amrld_file_path, onto_prefix,
# base_output_dir, technical_dir_path)
assert
os
.
path
.
exists
(
amrld_file_path
),
f
'
input file does not exists (
{
amrld_file_path
}
)
'
assert
os
.
path
.
exists
(
amrld_file_path
),
f
'
input file does not exists (
{
amrld_file_path
}
)
'
# -- Extraction Processing
# -- Extraction Processing
...
@@ -537,10 +480,6 @@ def generate_odrl_from_amrld_dir(
...
@@ -537,10 +480,6 @@ def generate_odrl_from_amrld_dir(
config
=
__set_config
(
config_dict
)
config
=
__set_config
(
config_dict
)
# config = __set_config(ODRL_CONFIG_FILE_PATH,
# 'amr', amrld_dir_path, onto_prefix,
# base_output_dir, technical_dir_path)
assert
os
.
path
.
exists
(
amrld_dir_path
),
f
'
input directory does not exists (
{
amrld_dir_path
}
)
'
assert
os
.
path
.
exists
(
amrld_dir_path
),
f
'
input directory does not exists (
{
amrld_dir_path
}
)
'
__count_number_of_graph
(
config
)
__count_number_of_graph
(
config
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment