From 116f46a79acc27c1e78e0879a1b69f6412457912 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Lamercerie?=
 <aurelien.lamercerie@tetras-libre.fr>
Date: Thu, 7 Apr 2022 19:38:57 +0200
Subject: [PATCH] Basic document parsing (with org and unl parts separation)

---
 asd/__pycache__/doc.cpython-310.pyc           | Bin 0 -> 1692 bytes
 asd/doc.py                                    |  67 +++++
 .../doc/__pycache__/docLexer.cpython-310.pyc  | Bin 0 -> 2912 bytes
 .../doc/__pycache__/docParser.cpython-310.pyc | Bin 0 -> 8112 bytes
 grammar/doc/doc.g4                            |  47 ++++
 grammar/doc/doc.interp                        |  29 ++
 grammar/doc/doc.tokens                        |  11 +
 grammar/doc/docLexer.interp                   |  38 +++
 grammar/doc/docLexer.py                       |  83 ++++++
 grammar/doc/docLexer.tokens                   |  11 +
 grammar/doc/docListener.py                    |  51 ++++
 grammar/doc/docParser.py                      | 264 ++++++++++++++++++
 grammar/org/org.g4                            |  30 ++
 grammar/unl/unl.g4                            |  30 ++
 input.txt                                     |  10 +
 parse.py                                      | 115 ++++++++
 requirements.txt                              |   1 +
 17 files changed, 787 insertions(+)
 create mode 100644 asd/__pycache__/doc.cpython-310.pyc
 create mode 100644 asd/doc.py
 create mode 100644 grammar/doc/__pycache__/docLexer.cpython-310.pyc
 create mode 100644 grammar/doc/__pycache__/docParser.cpython-310.pyc
 create mode 100644 grammar/doc/doc.g4
 create mode 100644 grammar/doc/doc.interp
 create mode 100644 grammar/doc/doc.tokens
 create mode 100644 grammar/doc/docLexer.interp
 create mode 100644 grammar/doc/docLexer.py
 create mode 100644 grammar/doc/docLexer.tokens
 create mode 100644 grammar/doc/docListener.py
 create mode 100644 grammar/doc/docParser.py
 create mode 100644 grammar/org/org.g4
 create mode 100644 grammar/unl/unl.g4
 create mode 100644 input.txt
 create mode 100644 parse.py
 create mode 100644 requirements.txt

diff --git a/asd/__pycache__/doc.cpython-310.pyc b/asd/__pycache__/doc.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..85a8c8992eda54bf56db9080f47bf63f2375f1ec
GIT binary patch
literal 1692
zcmd1j<>g{vU|_J`=9i?)&cN^(#6iZ)3=9ko3=9m#DGUq@DGVu$ISf%Cnkk1dmnn)V
zmpO`=5yWTCVaa8UV$EfXVq;`*XGmd5VQpbZVQprLVs~dqVM}3eVMt+bW{To)XGq~l
z;cQ_@;cRA#;&f+7;Y#6dVMyU_W{ToUVGL%_<ar6Q-%pe27HdF$L2BMDmH^KHS52l{
zTyB{;slg>hsfoF_1fBD9bMx~;^0QO(AVSF?UC5XT>hjMr3=9mZ3{i|J3{gxej44bl
z3{lJ}%qc7_3{fm8ticSLY_~XE@{>z*Q}araLAqe3fY>1H%)r3#85B%4j5Q4L3@Hr3
z3@aJ^G#PJk6sP8uq~;~3YBJqoDNfBvD*~xn$xy_@z`*d!KtCftH&s6;F*h|YD^ou_
zzbL!7ATc>rKP0uJD6v?-G%v?7uS7qwI7L4tKUuG!@)k#Yd}dx|NqoFA$VQOWT#QwM
zFt_Q!WRn?TE@WU}0I@+TL5?g2Szp7D#jt>JAww`j5hnu!gC-N$QMWit^5ct3iZb)k
zS27lHGcZ6L33h1_FNiC^z`&peV(>FCFjNV_ZH4ReTgeJ?VP0ZxYJB`HuK4)e{FKrh
z5Su4HzOXbg2O?9%2Qma?8pLo=SU^}{YuFhW7!*O?V`E@o;9%fj;$bcViQ$aDB9KB*
z90fxII~kNFVIcrw!@Uo(6cR@@Of?MgjG&l;2Er|l{G#;ug2bYdTO6f%IbfD1GbA7+
zKn|7!5om4&`9uel_*obj7}yv=kpXiA5s@Lpz`y{Dj2eb4rYuHrc%(2Ffh`3&xCm^s
zAQtOEvIZdQK^_!htP+A-j);>YkS&nlMRUA30|SE=NH-|_zyZxt1QH_{(Cq$2=>dsF
zxbs302gtFY#I=&~7He5zPHE~&CP+|%@*0wBz%dRAMKh2NaKaLRnvOdFK`9!Pnm>aa
z4hleMN-mNH8G<XIEJ0}s6i{r8Re~@(hzTbt1_lNbkjY{o0>mR6LZNv%M1~N=(Xt?;
z(c%XbEcPIANU%VSBpfV7av)8h(1)0fD?pq;%H%<LSrBG6F~On0z`$S&(h3d^5Koi)
z7ISfB@hz_6(xif-{N&W);v!H$7lHELEw;qGlAI!wB2X47Vg+dg<z{e7D*^>(5y%Ln
lBmoWuP`bFqVFM{Z>_8D-3<?eoJ^@AnMm_;PPChOnP5{{>B*p*$

literal 0
HcmV?d00001

diff --git a/asd/doc.py b/asd/doc.py
new file mode 100644
index 0000000..bbfdebb
--- /dev/null
+++ b/asd/doc.py
@@ -0,0 +1,67 @@
+#!/usr/bin/python3.10
+# -*-coding:Utf-8 -*
+
+#==============================================================================
+# TENET: inference
+#------------------------------------------------------------------------------
+# Module to execute the extraction process, 
+# by applying the transduction schemes with the SHACL inference engine
+#==============================================================================
+
+#==============================================================================
+# Importing required modules
+#==============================================================================
+
+import sys
+from subprocess import Popen, PIPE
+from antlr4 import FileStream, CommonTokenStream
+# from antlr.unlLexer import unlLexer
+# from antlr.unlParser import unlParser
+
+
+#==============================================================================
+# Parameters
+#==============================================================================
+
+# None
+
+
+#==============================================================================
+# Document Class
+#==============================================================================
+
+class Document:
+    
+    def __init__(self, sentence):
+        self.sentence = sentence
+    
+    def to_string(self):
+        return self.sentence.to_string()
+    
+
+class Sentence:
+    
+    def __init__(self, org_part, unl_part):
+        self.org_part = org_part
+        self.unl_part = unl_part  
+    
+    def to_string(self):
+        return self.org_part.to_string() + self.unl_part.to_string
+    
+    
+class OrgPart:
+    
+    def __init__(self, value):
+        self.value = value     
+    
+    def to_string(self):
+        return self.value
+    
+    
+class UnlPart:
+    
+    def __init__(self, value):
+        self.value = value    
+    
+    def to_string(self):
+        return self.value
\ No newline at end of file
diff --git a/grammar/doc/__pycache__/docLexer.cpython-310.pyc b/grammar/doc/__pycache__/docLexer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..997817b3aabec262a7798f35c9b0ab317b3e40f1
GIT binary patch
literal 2912
zcmd1j<>g{vU|=Xz^iR6a!@%$u#6iX^3=9ko3=9m#ISdR8DGVu$ISe5nnkk1dmnn*g
z5yWQBVUA)>WzJ$rVUc7=Va;YKl1X7mVas97Ws72CWXNL!^VlK06owS`9FAPhC{9L(
z6pj?m7KSJ;cZL+M6z&#=6z){+X67iK6y{(CO`ew^cW5%+V${-Pyu}e*Qk0pO?&<IM
zl97Rd;Uy~r1A`{xEw+%<iV{zMO~zZyDf!9CARRCaQpUl+z~BsWy%!?`LkUAOL;s9g
zh8l(ij46x@85togCOC^3&SHVHSm7)-IEx+5;()U_;VdpViyO}3fwOqwEIv4kAI=hh
zvjpKRAvjAI&JuyMMBywkI7=MPl7O=$;VdaQOB&9SfwGtuGK4cQF{CiCFt9K*Gcz&-
zGZ-=y&nstSWB|cP22gwlGib{Cl}a-(Ffc!B>V7q6)r*$Rua~cV(z@sI`sL5(wLM+B
zgo(3CI*fyng^81ig_)C?g@u!ag_V<)g^iPqg`JbVN}7d(lY^NF3Yp=6g_(u9N*Y8$
zFe@_*vazvgaIx`LNwe{>X)?1hvoW({z$$5Wc6JLcb{=*<c57}l`6_8<4rIvA;myUt
z!@<Yl$IZc9C50l##Kg(W%*w*f%E`vfUM0!P!OzLe#LUXX&BMp9q{I}##LUdZ#Kgo}
zB?YG0!6YY`<OY+x%uH31OiWDtARe<Ih-4K7k?bHHRnknHiXa}hI*8=e1(E#DAW~4M
zN|HU6iAhM9-HnM!ScE;EiAjV7BqJ)uUL_IB#3UxpZpg$WE&-C4kOav~vN2alFflPn
zNrQAs%dm$sG0DiX+b}W7%CT3;gfKD5$+H_XG07`{R4Z_ROjA?>sa2{{PzIT%tOC-b
zq6(5%<pRm8X>c+zF{!IDSIILmF==Rm#5BRK)zkv9wX{K`whjkdl^he34iAW@tH)l;
z#H6RsUdY6xZ@^x{#AIN|o(XcAp%KV6M#k*vOiacm>;+6rCZ_E5RZ>h$rj{U`W)|E`
zOiboxAhv}yh^$huGzZa^Rv^;K21Hte)!NvCY_YYgk_Wlm&K_i&y#vTL2S@fyCMHKG
z_If5JCqCvXMJ6UDXBUua7gzRjCMH)mkj-xH?0HN~?j9gjRSF(HAd@}4LH2ujg4o`E
zAkxPRO!`(SfM{QT5a|b2>mL9zF(43RdteZIIVgaFf<Yz+hk#5D2?d!PDge^K#-wTK
zQ^mr}!NjD=T*S=4z;KJTyeP9I^%h5ZYDrmQPH8Hr<Vq?{Tggzw&A`C$%T_-lKQ~oB
zCowlQFDp|&JijQrxF9h(RX-%Pq$sgizceq$F|R~Fy(lp^H?c@RB|jO2eNrn@i}VUA
zZ}AqV7G);pWLBl7IEMJ~gGxR-1_lNWMlLcT8>7nOB3=dt29%-|RK$V`U=UqAiGhJ3
zl_82Tg&~S5mARQ=0ZS@tGs8m0D7F;F6s8u2RQ6Q1ERJSI7lvlWD9#k-D6SNiDDD*2
zD4rCyDBcwID83YqDE<`AC;?C<kj@k(n8K6J7$ua>kixr&F-ka{A%$-dW0XibLkj;Q
z#wgJgfhe&Q!Bp`qi8Q7Zp;XB<rgnxj#uVWckrv)4sdfeyhA8P^22Igh94Yz9;E+iM
zRRtiof-oo+KyC+D3sMXW4CxFt46&RsOtp-)OeG98Of`(nOcR+3nSvov0*W9PH%Cq8
zTTFTew-_^{m{N+taRniGK#|I2lapColANDgU}pjfb0!7`1}TOrVVDE-5b{N!3iTyO
z!Ap=!UV^wqAosjv166D6Amc<`T-_W)eL~_x{JmZM;+;Jl{rp^gZn1fIy12Od-Qs~s
z`}(`MYO?;~wDI%{bM*0av8!TLk9LVwuVPb=)(6oX>e0bgL55I~;8=A{mLgEOeTykP
z_!hH&kozs>P(L3{_FF6=@$m*=(hy7<fl1>c83qQ1B3Te2$H2f)#g>wvte0-$cZ)5*
zw4|W4BpKudP@sY^hz;@xh%N@DI#7MPfU$<LhG`)axUOY_)Vd|iH4HTjSu8aSS*#^&
z%?!;fH4O3WDGW6X@f={1Gle0TL6ga^iq%BVQqNeE>lSNqX+dhyEsps3%)HE!`1o5q
z$r-81*<q<g#hLkew*<k_>=@z~oS9pilUR~pbc;E$B<~hqN@{W@NJVi-zKfgVEwO;2
z)RfHRlFa-(=lr~q)QS@4#N>?BTkP?fc_pbu1-Cfj6Tz~@QQYwbV6}-Qsl}Siw^)i(
zbJA|HX66-?mJ}&7FfgoSyu}$GpPZ9eTpS;dND4)&3=9lDpfmt#R`D@%F^Vy=F>*0h
z3BuC?%nzC}Aphhg=BCES-{Ojo&&^LM%>l7_;^PZT6LX+4{Ei`hE~)T*U37~(1&IgB
znnfC*M8lbySDKqzlvt8_iy!9gko@41#FEq^O;8NVg9rr>p$H<BKm=<1M)4$PB<AI%
z=J+M%rWQwW=H{oQf|<8?axzO&ixMFcw|I*ybCdFOGLs>aoJFNMP<8y!g2^otB65ql
zxU%>bTX9KBerZV&D20Oq=oT+D=#o=&a$vy+D*uY~L9PPjsUkiG1_lFAK!O~~!NA8P
z!o<SF!iazzOl(XXOrUz3g-MACtX`A%7F%LoNluZ;EvC$TaDiK70}33TGDu2^&&*59
zzr|KkS&*5R4t7rzCxoMynIFZRSe#M>%7Ne#?-naKRe=qLL@?NeU;^aATO2l!D6<0<
aFU3OOa+ZgYgOQJskCBIohna&#L=gZfc}PA0

literal 0
HcmV?d00001

diff --git a/grammar/doc/__pycache__/docParser.cpython-310.pyc b/grammar/doc/__pycache__/docParser.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6356f2ab138d2e5b8efa078c6b549fa9e9274a89
GIT binary patch
literal 8112
zcmd1j<>g{vU|=Xz^iQgjXJB{?;vi!d1_lNP1_p-W90mr46owSW9EK1O&6LBK%M``L
z2x2qmFh?<`GH0=*ut+kbux7Iq$)qr(u;sAkvPH2mGUTy=dF&8g3PTEe4o5C$6elA?
z3P%cO3qur_J3|Ur3U>=b3U?}ZGjkMA3Ue@nCeKTdJ2V+@F=}Zt-r@)@Day=C_w@IB
z$;iOK@RF5*fkBh;7F$SaMTw`sCgUyUl>FpmkPa9IDdS*ZU~mSxzK4N<p@gBCq2IEW
zp@v}rV+!L!Mn(vW3C?1MvsmCPRyd0d&SHnNIN&T!D2r(!LpTEyLka^60}DelGb2MV
zgCRq)eK{i|0|-VkfI=*oL6gg`RGNW-f%#ce_p3RpUbJj}y?pJH);*8cFMmF-?dj4b
z%$!wHGAvA-Of1Zt%q%RNEG(>?tjtVMSS8KGj0{+qL6DUh467uWnAn(D*f^M&n8ca5
zn3<TESolFCn`o6Jh~|(0kzCyD%uGz&Jgh9tOgy~oRT3;rOuT#?OpHu?%piGw0d`g<
zCILa#DseUzCP5)q4pt^1VRkkqCSef{W=19v)+(?$qGIeEOiW_p94w4X;vgfLBqXZ@
z**Te*B&9f58JVOwL8dWjG8Zv3FfiO=EicL}Nxj99o?23tm{Xbxin^rIw3Q4++zbp1
zzwGog@^e%5a}sk?^RhDa!}E)>iwhEyQ}shqONtVU^-J?|9P>)_(~A;wa}$g7Q}UBR
zI3Tg8IJHQxpz;=PacWU!VoqjNYKmisA3rGWB^VeOI2gGwARD8~w<2B!1_qR514=%i
z#08>@nHU)uQW>HcQy8L{QW&F{Q(2lB7O<wWH8U(^jABn=N?~qcNaaXn&*E%mbYW;_
zjN(e=Zf1z$NoPo5S;QE{o6eBJx`;7~FNH0NKZQL?AcZ4JFoiQpD4ijNYY}6VaH>d_
zXq0FQdz4rTN0fL9XOu(=ca&rbPn1*&Z<KTjUzAL$Y?fRaV+wz&e2PG-LK;guLmFd>
zV2V%+Z<L}tLyB;UNDD)XNUBmZbChz5Xo^@1LzId;LyCBcL<>WT1Xx@(MKVRIg&|7K
zogqa!MW%%zMFuRco+6tf*TN8`;m(jEpQ6yhkfHz<*Gy4NQEFj`(rRa5VTjTWX3$i=
z#hH?y3<=~awv_y2y>ydgQ2qi15(tB078GdUye7rKz>v;R!w}0E!&J*y%T&Tp!&JlA
z%rudykSQ1v&7eqkadXsUzQv?xaEmcBiYcWS9RCo42b55_Y;rP-OOo?*3+$9Z(a6NW
zz#zp?B?5Pe9zua8>n~0lProonA5Ry%DpvJqmss^GHuY$I5Y3?;9c&e32o(vARoCPw
z5@ujvz((9+3J<=;>>uQQi#gQKrwHW7B2W?d5@h>JkT*41ZgHgKCzs}?=9S#yC{E2Q
zNzF@6y~Unilpc^+RC0^GG%p9lcnQ*01PYXwY@h;*9poq0%)Elql3Q%~r6mQWCCMQ3
zK*0yXAT~%fh%UB(7FP=xYZz;o7BYd0DkeywRl;1uP{WYLQp1qNTEmdV*344F5YL{%
zP{R<<k-`wnpvmM{#cHBwsb{Rod5g8Uv>>(U7Ds%1W?p7VeEcn*<c!qh?6B0L;>`TK
zTSAaHbqw(f&de>%Ni4}Py2YGWl6Q+QB{ew{q@*|`-^I=GmQZm<Vo_>}bADb)YDI~2
zVsb|6E%x}#ypq(Sf?M421x2YTnaPPIsl}Siw^)i(bJB_wKq0Kiz`(GQ@fK%%d~!}=
zadCV+B7TcNk!k~qSWqF&$H>Jf#>mB3C4`7xn458C&(9GIsM#|`Foh+RC51JW6`E1l
zQrKH~qu8MJ3_GNjiQ-7%N##sqO5si6Yhj7vg7WxN1X@_4xWS2@CzwG~s7eyy78huY
zLj&LzTLHMl@q79I|NsAB9w<5RX6BXUXJ_W62bUz4q$Yz>04Qof7{mr)Q1pIQK}0W8
z4Kp};8EY8gnZS|A3?^AXQMr=QPm{Gs2^5l`&??dgr6K10(vn+@#Vc8gv_TReu_8kd
z%Lqj1fCx}YS_H~O;6V0ZU|{$R31l`#Hc%id;q;guJkUW&ALMWl2C+eo2W9Wid<+br
zij$#)F$<I;nSvQsGWmgXiYD_d;q=s!kjjG8l%Ud_RA^uofppyB4~AwHsOU;Yi2K2A
z1ruNgg9;Bnkb^-Pj*qblk5gfJ52YjmITeJ#P6cJW5{4|sV1^<SP<w$1;uPMXP#@QL
zSSBvAg$Ehj8Mk=SQ%gWz@XSj|tq6j-hmEmH4R_EXYq`Y%D!fwjQj3y79sva+2!q(5
z6aaRe3^*0kFs3jzGZpdFFfL%MVF0xvKrs)(noNGTctL?y1WSFIOt;uG5{nZ{N{WhH
zL0O9l5{4k-AUO-+15U6`kRL)p&Ibhr7b71d%l|56++Kj|!RhKE7jQuO-Qr2D$SgrP
ztjG-%ie4bX9YlZ~0wzGwdW!?38swNrkfbLA0|UO8g((IlECY~zyr2>+FEKYYKK>S0
ze0*+xN@)&=%@ZGASelpvmEnw!FV4v?DUOf7B?u|lKsG|t+b!nIyb^GJC<3LiB1ceO
zf+!0qN=^02EG_{>R}m<;6nTRrd7-6Na%xTvtmFdw2%N{j-r|P^0Mrstq!fdaI|l;`
zqXZKN8wU#?vlvSeC<;)LDJT(u8s4CoD}Dw}2%zY%VaQ^tVaQ@$$Xv@<!?1t_lsIY_
zvRD@~Gcr`gRB6>PEMTi)Oaa#jDa<tt@ys<03)o8-vcL_|6i`h765(9PP{SC{mBNao
zf*X{yN_ZCV)-YyqW$`U!tYM7j3unkvW??AduZ(A8sAXbgs1gTN)$sx~3|WE;glZUI
zbx)Sy0$~_0oFS4Sg`t+Ijwz1^LYFg^GlYZ61d)Xdj0`o5!Dzf1h6P}mat2K{NJ#;y
zQ$c0YOHNSb2nq^KQE=`Aht(~X_~ep`Tdc*PaugCJpiT%hJNTEDq!u~ml-y#?O)N>y
zxWy6@A8!DORWne;f%t|+;0l)m7C}X(AQ_RM)a3m1yv&l!{5;o+<kSKX;}&NsoXH)Z
zT2$nbn3s~1T6BxMD77HJsKm9XD8J|ydr@j~epzY}IBl$C24}om969;Pi8&yrG8Ltw
z<-j6I1_p)%Pz47{!a_^}j4X^ijBH@c1*Sz9`54(4`52`bIU1P$GX3XZV_{_a%fZCL
z$n_t&B7~JxI4i<pkTuBd)D&T2E5a1s7KSL!6uwlhG^P}26$s^lTA3_SJm3nEH<&?F
z1i3;)E6u@GlV1_Yr-;f3X9ZZS1y1qJ3=0@*@Kk^`4Dqa>f)G-8v!PXlkQimmzr|Qu
zWDY7-z}XU<K|y&FoYgBq!4FDYpn%4#{Lnl>xbiEq1X*MSDw3FstU+v0X1~Sf4{7{C
zO95;(N)f2H*8tLC2P*s&5oIvCBcKgQl;Q$Zqroa*Ty6-3v^(*+0aW9(5aR}D8y3S2
zAa{U@tRj1mDVTLCtV+fj0-zGN1EkLh6aq*UDS8MXSENNC>j>9Ika`kkkR}tPI?4sv
zhNU_JmnhvJNpMYsR3XAlfQF(TsTCqjKg<PrAWN{h093>DgCxN&Kx*Z{On|$fA`{#a
zDe?iOI=&)bkS0(IsmLG13IGv-AOh4%0VjHJQY!+*HMo*13I@r-tF@vK5Eq<Hzyvtg
zfHMrJF<u7BFan?sGo%7z=U{<WVIWzQ^ng-@{Q+laf>l_0l_{B37&o{|<4Ivds?vDD
zRT|#{esGm0P{WwTlO?c_sfICL09vsLR%W48Y(g~*S;7lMU{x8ICko@?so2C&DmFN;
zhG7AiUCyA%?pGuZ3RqAseF+*=cnK;QUV;W7UV_3?Qyfyefr7CJ+z5&SDF;VO4u};E
zB4R+56sY<#f<$5lNDv&8nJ^h+XvGFgC*WeR2vh+SC4y8Zfe27dQIrB=fznw~Du@M+
zZ7>0jZBQd<8Ys3wxfru{g49q_MAlHSd=Ii0)Xs%tP>&YGDbfR_UZE-pM7f7nW`g=L
zL{}h1#vmVoT#nH{f`kV+;BeIn8$p^uUF<5PVi?sqq*Vpr(X5dC?9@C+B#VM9z|yTK
z0!eHKNrEc@8C-#(=O5&blGhj+7#KhpR2X8Gy*%Ll47A?BS@iA#*$XauRdL!&M#;+q
zE+V0!2P^g}Kz3m(_CV=<FGv!-*n=7XFZDnp3!v6J;i8`#)LVdRhuIIR_lvOE56TV)
z;r8Q>evp|U>mj8sFEOPqC~iqBaI+W~7}kM0XP}}2-eX_}_ZUEOC`kahxcz*ExZ*ar
zN)caiTf-R7S;Dn|yM{4~Glij)p-3l%A&(iv%i;m|4|t))C|_k5N-@e`!;mGgKoC}N
zf_d=X0-j=27^N76^J*9tfZ62?nk;@rvY_Yz7o(s-I!zHsmI8$bdbwE+QVB0N!I`TF
zobka;e|Bh^xWxh*vVs%~q!*K*X4?r+9BD8xFd&LaK1L;cMWY}hhe5L?DJ3IXOOCoF
zBd8+5*<QX23Q_cu5!E@Qm5fE81_dMr#Xx3bD-=Nzpn0bv^g<EM;!r;ylpFyn5J4DU
zp$Kir;w%(FUGF0FLJ`egvI|A1Ij};h5flK}3MEjX2%79FLN64d29R4QLbb!}Z^C6i
zs8D<hk_6k2R476XfZLDK6eX%q1jQ|Bg<>|Sp#ySE6^H=wP$C_<7%Zv=H7Z#AisV2_
z!9@nhK3I_f(u-bXfO}wwA_Jtn2%IAzMF!NvMRgzpK_doaG^{|)wJ#w3^`LGJQb_<!
zi<&xUwaG1h=xml-W=^VKVs7d!e#a0$msI#nR?#i)6eJ#~by=ha3MS6fywcp%qQsKa
zTl}yA`;dI_;B%2WNP$=Ycwinh$^nf;@X&n`s4%<5lapDJT9lXrva<LVZ*gUAQhrWm
zGMHNg8g3~9H%!5$EGXP=aTb+A99Vpd72Mjn#q8?uRul{JY#fLv1`#D7LIgyBYeY~G
z-(oJVEWX87T#^DByXFS#4K684P0TF<jY~p$E#OWJXrK%{a8d+rD1#cQMc|?zR1Oz`
zV-ys=xH~bRsuWa%6oWeH91JW>d`u!tY)l+X984_CEQ}z?0h%2Jvp_r$mSGa$<d7B+
z<4_Qg;E)m!5|9EHS(?1J*b?(fa*9lDF=gh1r!R{pfP$5$3^IQbpP84Ie~Yc8vLG`r
z9qg4TP6$UYGyfKIVsQ$%u@0VOyu}6?k_4NKG*kp{k>BF5fsAX~feN)^Ay6lbiHDJc
Rk&ls&k%x(gnS)t`4*)kBu(1FD

literal 0
HcmV?d00001

diff --git a/grammar/doc/doc.g4 b/grammar/doc/doc.g4
new file mode 100644
index 0000000..7912b9c
--- /dev/null
+++ b/grammar/doc/doc.g4
@@ -0,0 +1,47 @@
+//=============================================================================
+// ANTLR Grammar for UNL Document
+//=============================================================================
+
+grammar doc;
+
+@header {
+from asd import doc
+}
+
+
+//=============================================================================
+// Parser Grammar
+//=============================================================================
+
+//---------------------------------------------------------
+// Document = list of sentence
+//---------------------------------------------------------
+
+document returns [out]
+  : '[D]' s=sentence '[/D]'               {$out = doc.Document($s.out)}
+  ;
+
+sentence returns [out]
+  : '[S:R1]' o=orgPart u=unlPart '[/S]'   {$out = doc.Sentence($o.out, $u.out)}
+  ;
+
+orgPart returns [out]
+  : o=ORG                                 {$out = doc.OrgPart($o.text)}
+  ;
+
+unlPart returns [out]
+  : u=UNL                                 {$out = doc.UnlPart($u.text)}
+  ;
+
+
+//=============================================================================
+// Lexer Grammar
+//=============================================================================
+
+// ignore whitespaces
+WS              : (' '|'\n'|'\t'|'\r'|'\u000C')+ -> skip ;
+
+// other tokens
+ORG             : '{org:en}' (.)* '{/org}' ;
+UNL             : '{unl}' (.)* '{/unl}' ;
+
diff --git a/grammar/doc/doc.interp b/grammar/doc/doc.interp
new file mode 100644
index 0000000..69c2f70
--- /dev/null
+++ b/grammar/doc/doc.interp
@@ -0,0 +1,29 @@
+token literal names:
+null
+'[D]'
+'[/D]'
+'[S:R1]'
+'[/S]'
+null
+null
+null
+
+token symbolic names:
+null
+null
+null
+null
+null
+WS
+ORG
+UNL
+
+rule names:
+document
+sentence
+orgPart
+unlPart
+
+
+atn:
+[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 3, 9, 28, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 3, 5, 3, 5, 3, 5, 3, 5, 2, 2, 6, 2, 4, 6, 8, 2, 2, 2, 23, 2, 10, 3, 2, 2, 2, 4, 15, 3, 2, 2, 2, 6, 21, 3, 2, 2, 2, 8, 24, 3, 2, 2, 2, 10, 11, 7, 3, 2, 2, 11, 12, 5, 4, 3, 2, 12, 13, 7, 4, 2, 2, 13, 14, 8, 2, 1, 2, 14, 3, 3, 2, 2, 2, 15, 16, 7, 5, 2, 2, 16, 17, 5, 6, 4, 2, 17, 18, 5, 8, 5, 2, 18, 19, 7, 6, 2, 2, 19, 20, 8, 3, 1, 2, 20, 5, 3, 2, 2, 2, 21, 22, 7, 8, 2, 2, 22, 23, 8, 4, 1, 2, 23, 7, 3, 2, 2, 2, 24, 25, 7, 9, 2, 2, 25, 26, 8, 5, 1, 2, 26, 9, 3, 2, 2, 2, 2]
\ No newline at end of file
diff --git a/grammar/doc/doc.tokens b/grammar/doc/doc.tokens
new file mode 100644
index 0000000..dc0d799
--- /dev/null
+++ b/grammar/doc/doc.tokens
@@ -0,0 +1,11 @@
+T__0=1
+T__1=2
+T__2=3
+T__3=4
+WS=5
+ORG=6
+UNL=7
+'[D]'=1
+'[/D]'=2
+'[S:R1]'=3
+'[/S]'=4
diff --git a/grammar/doc/docLexer.interp b/grammar/doc/docLexer.interp
new file mode 100644
index 0000000..eb836f7
--- /dev/null
+++ b/grammar/doc/docLexer.interp
@@ -0,0 +1,38 @@
+token literal names:
+null
+'[D]'
+'[/D]'
+'[S:R1]'
+'[/S]'
+null
+null
+null
+
+token symbolic names:
+null
+null
+null
+null
+null
+WS
+ORG
+UNL
+
+rule names:
+T__0
+T__1
+T__2
+T__3
+WS
+ORG
+UNL
+
+channel names:
+DEFAULT_TOKEN_CHANNEL
+HIDDEN
+
+mode names:
+DEFAULT_MODE
+
+atn:
+[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 9, 86, 8, 1, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8, 3, 2, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 6, 6, 6, 40, 10, 6, 13, 6, 14, 6, 41, 3, 6, 3, 6, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 7, 7, 56, 10, 7, 12, 7, 14, 7, 59, 11, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 7, 8, 75, 10, 8, 12, 8, 14, 8, 78, 11, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 2, 2, 9, 3, 3, 5, 4, 7, 5, 9, 6, 11, 7, 13, 8, 15, 9, 3, 2, 3, 5, 2, 11, 12, 14, 15, 34, 34, 2, 88, 2, 3, 3, 2, 2, 2, 2, 5, 3, 2, 2, 2, 2, 7, 3, 2, 2, 2, 2, 9, 3, 2, 2, 2, 2, 11, 3, 2, 2, 2, 2, 13, 3, 2, 2, 2, 2, 15, 3, 2, 2, 2, 3, 17, 3, 2, 2, 2, 5, 21, 3, 2, 2, 2, 7, 26, 3, 2, 2, 2, 9, 33, 3, 2, 2, 2, 11, 39, 3, 2, 2, 2, 13, 45, 3, 2, 2, 2, 15, 67, 3, 2, 2, 2, 17, 18, 7, 93, 2, 2, 18, 19, 7, 70, 2, 2, 19, 20, 7, 95, 2, 2, 20, 4, 3, 2, 2, 2, 21, 22, 7, 93, 2, 2, 22, 23, 7, 49, 2, 2, 23, 24, 7, 70, 2, 2, 24, 25, 7, 95, 2, 2, 25, 6, 3, 2, 2, 2, 26, 27, 7, 93, 2, 2, 27, 28, 7, 85, 2, 2, 28, 29, 7, 60, 2, 2, 29, 30, 7, 84, 2, 2, 30, 31, 7, 51, 2, 2, 31, 32, 7, 95, 2, 2, 32, 8, 3, 2, 2, 2, 33, 34, 7, 93, 2, 2, 34, 35, 7, 49, 2, 2, 35, 36, 7, 85, 2, 2, 36, 37, 7, 95, 2, 2, 37, 10, 3, 2, 2, 2, 38, 40, 9, 2, 2, 2, 39, 38, 3, 2, 2, 2, 40, 41, 3, 2, 2, 2, 41, 39, 3, 2, 2, 2, 41, 42, 3, 2, 2, 2, 42, 43, 3, 2, 2, 2, 43, 44, 8, 6, 2, 2, 44, 12, 3, 2, 2, 2, 45, 46, 7, 125, 2, 2, 46, 47, 7, 113, 2, 2, 47, 48, 7, 116, 2, 2, 48, 49, 7, 105, 2, 2, 49, 50, 7, 60, 2, 2, 50, 51, 7, 103, 2, 2, 51, 52, 7, 112, 2, 2, 52, 53, 7, 127, 2, 2, 53, 57, 3, 2, 2, 2, 54, 56, 11, 2, 2, 2, 55, 54, 3, 2, 2, 2, 56, 59, 3, 2, 2, 2, 57, 55, 3, 2, 2, 2, 57, 58, 3, 2, 2, 2, 58, 60, 3, 2, 2, 2, 59, 57, 3, 2, 2, 2, 60, 61, 7, 125, 2, 2, 61, 62, 7, 49, 2, 2, 62, 63, 7, 113, 2, 2, 63, 64, 7, 116, 2, 2, 64, 65, 7, 105, 2, 2, 65, 66, 7, 127, 2, 2, 66, 14, 3, 2, 2, 2, 67, 68, 7, 125, 2, 2, 68, 69, 7, 119, 2, 2, 69, 70, 7, 112, 2, 2, 70, 71, 7, 110, 2, 2, 71, 72, 7, 127, 2, 2, 72, 76, 3, 2, 2, 2, 73, 75, 11, 2, 2, 2, 74, 73, 3, 2, 2, 2, 75, 78, 3, 2, 2, 2, 76, 74, 3, 2, 2, 2, 76, 77, 3, 2, 2, 2, 77, 79, 3, 2, 2, 2, 78, 76, 3, 2, 2, 2, 79, 80, 7, 125, 2, 2, 80, 81, 7, 49, 2, 2, 81, 82, 7, 119, 2, 2, 82, 83, 7, 112, 2, 2, 83, 84, 7, 110, 2, 2, 84, 85, 7, 127, 2, 2, 85, 16, 3, 2, 2, 2, 6, 2, 41, 57, 76, 3, 8, 2, 2]
\ No newline at end of file
diff --git a/grammar/doc/docLexer.py b/grammar/doc/docLexer.py
new file mode 100644
index 0000000..ffe37e2
--- /dev/null
+++ b/grammar/doc/docLexer.py
@@ -0,0 +1,83 @@
+# Generated from grammar/doc/doc.g4 by ANTLR 4.9.3
+from antlr4 import *
+from io import StringIO
+import sys
+if sys.version_info[1] > 5:
+    from typing import TextIO
+else:
+    from typing.io import TextIO
+
+
+from asd import doc
+
+
+
+def serializedATN():
+    with StringIO() as buf:
+        buf.write("\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\2\t")
+        buf.write("V\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7")
+        buf.write("\4\b\t\b\3\2\3\2\3\2\3\2\3\3\3\3\3\3\3\3\3\3\3\4\3\4\3")
+        buf.write("\4\3\4\3\4\3\4\3\4\3\5\3\5\3\5\3\5\3\5\3\6\6\6(\n\6\r")
+        buf.write("\6\16\6)\3\6\3\6\3\7\3\7\3\7\3\7\3\7\3\7\3\7\3\7\3\7\3")
+        buf.write("\7\7\78\n\7\f\7\16\7;\13\7\3\7\3\7\3\7\3\7\3\7\3\7\3\7")
+        buf.write("\3\b\3\b\3\b\3\b\3\b\3\b\3\b\7\bK\n\b\f\b\16\bN\13\b\3")
+        buf.write("\b\3\b\3\b\3\b\3\b\3\b\3\b\2\2\t\3\3\5\4\7\5\t\6\13\7")
+        buf.write("\r\b\17\t\3\2\3\5\2\13\f\16\17\"\"\2X\2\3\3\2\2\2\2\5")
+        buf.write("\3\2\2\2\2\7\3\2\2\2\2\t\3\2\2\2\2\13\3\2\2\2\2\r\3\2")
+        buf.write("\2\2\2\17\3\2\2\2\3\21\3\2\2\2\5\25\3\2\2\2\7\32\3\2\2")
+        buf.write("\2\t!\3\2\2\2\13\'\3\2\2\2\r-\3\2\2\2\17C\3\2\2\2\21\22")
+        buf.write("\7]\2\2\22\23\7F\2\2\23\24\7_\2\2\24\4\3\2\2\2\25\26\7")
+        buf.write("]\2\2\26\27\7\61\2\2\27\30\7F\2\2\30\31\7_\2\2\31\6\3")
+        buf.write("\2\2\2\32\33\7]\2\2\33\34\7U\2\2\34\35\7<\2\2\35\36\7")
+        buf.write("T\2\2\36\37\7\63\2\2\37 \7_\2\2 \b\3\2\2\2!\"\7]\2\2\"")
+        buf.write("#\7\61\2\2#$\7U\2\2$%\7_\2\2%\n\3\2\2\2&(\t\2\2\2\'&\3")
+        buf.write("\2\2\2()\3\2\2\2)\'\3\2\2\2)*\3\2\2\2*+\3\2\2\2+,\b\6")
+        buf.write("\2\2,\f\3\2\2\2-.\7}\2\2./\7q\2\2/\60\7t\2\2\60\61\7i")
+        buf.write("\2\2\61\62\7<\2\2\62\63\7g\2\2\63\64\7p\2\2\64\65\7\177")
+        buf.write("\2\2\659\3\2\2\2\668\13\2\2\2\67\66\3\2\2\28;\3\2\2\2")
+        buf.write("9\67\3\2\2\29:\3\2\2\2:<\3\2\2\2;9\3\2\2\2<=\7}\2\2=>")
+        buf.write("\7\61\2\2>?\7q\2\2?@\7t\2\2@A\7i\2\2AB\7\177\2\2B\16\3")
+        buf.write("\2\2\2CD\7}\2\2DE\7w\2\2EF\7p\2\2FG\7n\2\2GH\7\177\2\2")
+        buf.write("HL\3\2\2\2IK\13\2\2\2JI\3\2\2\2KN\3\2\2\2LJ\3\2\2\2LM")
+        buf.write("\3\2\2\2MO\3\2\2\2NL\3\2\2\2OP\7}\2\2PQ\7\61\2\2QR\7w")
+        buf.write("\2\2RS\7p\2\2ST\7n\2\2TU\7\177\2\2U\20\3\2\2\2\6\2)9L")
+        buf.write("\3\b\2\2")
+        return buf.getvalue()
+
+
+class docLexer(Lexer):
+
+    atn = ATNDeserializer().deserialize(serializedATN())
+
+    decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ]
+
+    T__0 = 1
+    T__1 = 2
+    T__2 = 3
+    T__3 = 4
+    WS = 5
+    ORG = 6
+    UNL = 7
+
+    channelNames = [ u"DEFAULT_TOKEN_CHANNEL", u"HIDDEN" ]
+
+    modeNames = [ "DEFAULT_MODE" ]
+
+    literalNames = [ "<INVALID>",
+            "'[D]'", "'[/D]'", "'[S:R1]'", "'[/S]'" ]
+
+    symbolicNames = [ "<INVALID>",
+            "WS", "ORG", "UNL" ]
+
+    ruleNames = [ "T__0", "T__1", "T__2", "T__3", "WS", "ORG", "UNL" ]
+
+    grammarFileName = "doc.g4"
+
+    def __init__(self, input=None, output:TextIO = sys.stdout):
+        super().__init__(input, output)
+        self.checkVersion("4.9.3")
+        self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache())
+        self._actions = None
+        self._predicates = None
+
+
diff --git a/grammar/doc/docLexer.tokens b/grammar/doc/docLexer.tokens
new file mode 100644
index 0000000..dc0d799
--- /dev/null
+++ b/grammar/doc/docLexer.tokens
@@ -0,0 +1,11 @@
+T__0=1
+T__1=2
+T__2=3
+T__3=4
+WS=5
+ORG=6
+UNL=7
+'[D]'=1
+'[/D]'=2
+'[S:R1]'=3
+'[/S]'=4
diff --git a/grammar/doc/docListener.py b/grammar/doc/docListener.py
new file mode 100644
index 0000000..b71589a
--- /dev/null
+++ b/grammar/doc/docListener.py
@@ -0,0 +1,51 @@
+# Generated from grammar/doc/doc.g4 by ANTLR 4.9.3
+from antlr4 import *
+if __name__ is not None and "." in __name__:
+    from .docParser import docParser
+else:
+    from docParser import docParser
+
+from asd import doc
+
+
+# This class defines a complete listener for a parse tree produced by docParser.
+class docListener(ParseTreeListener):
+
+    # Enter a parse tree produced by docParser#document.
+    def enterDocument(self, ctx:docParser.DocumentContext):
+        pass
+
+    # Exit a parse tree produced by docParser#document.
+    def exitDocument(self, ctx:docParser.DocumentContext):
+        pass
+
+
+    # Enter a parse tree produced by docParser#sentence.
+    def enterSentence(self, ctx:docParser.SentenceContext):
+        pass
+
+    # Exit a parse tree produced by docParser#sentence.
+    def exitSentence(self, ctx:docParser.SentenceContext):
+        pass
+
+
+    # Enter a parse tree produced by docParser#orgPart.
+    def enterOrgPart(self, ctx:docParser.OrgPartContext):
+        pass
+
+    # Exit a parse tree produced by docParser#orgPart.
+    def exitOrgPart(self, ctx:docParser.OrgPartContext):
+        pass
+
+
+    # Enter a parse tree produced by docParser#unlPart.
+    def enterUnlPart(self, ctx:docParser.UnlPartContext):
+        pass
+
+    # Exit a parse tree produced by docParser#unlPart.
+    def exitUnlPart(self, ctx:docParser.UnlPartContext):
+        pass
+
+
+
+del docParser
\ No newline at end of file
diff --git a/grammar/doc/docParser.py b/grammar/doc/docParser.py
new file mode 100644
index 0000000..ab5ba95
--- /dev/null
+++ b/grammar/doc/docParser.py
@@ -0,0 +1,264 @@
+# Generated from grammar/doc/doc.g4 by ANTLR 4.9.3
+# encoding: utf-8
+from antlr4 import *
+from io import StringIO
+import sys
+if sys.version_info[1] > 5:
+	from typing import TextIO
+else:
+	from typing.io import TextIO
+
+
+from asd import doc
+
+
+def serializedATN():
+    with StringIO() as buf:
+        buf.write("\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\3\t")
+        buf.write("\34\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\3\2\3\2\3\2\3\2\3")
+        buf.write("\2\3\3\3\3\3\3\3\3\3\3\3\3\3\4\3\4\3\4\3\5\3\5\3\5\3\5")
+        buf.write("\2\2\6\2\4\6\b\2\2\2\27\2\n\3\2\2\2\4\17\3\2\2\2\6\25")
+        buf.write("\3\2\2\2\b\30\3\2\2\2\n\13\7\3\2\2\13\f\5\4\3\2\f\r\7")
+        buf.write("\4\2\2\r\16\b\2\1\2\16\3\3\2\2\2\17\20\7\5\2\2\20\21\5")
+        buf.write("\6\4\2\21\22\5\b\5\2\22\23\7\6\2\2\23\24\b\3\1\2\24\5")
+        buf.write("\3\2\2\2\25\26\7\b\2\2\26\27\b\4\1\2\27\7\3\2\2\2\30\31")
+        buf.write("\7\t\2\2\31\32\b\5\1\2\32\t\3\2\2\2\2")
+        return buf.getvalue()
+
+
+class docParser ( Parser ):
+
+    grammarFileName = "doc.g4"
+
+    atn = ATNDeserializer().deserialize(serializedATN())
+
+    decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ]
+
+    sharedContextCache = PredictionContextCache()
+
+    literalNames = [ "<INVALID>", "'[D]'", "'[/D]'", "'[S:R1]'", "'[/S]'" ]
+
+    symbolicNames = [ "<INVALID>", "<INVALID>", "<INVALID>", "<INVALID>", 
+                      "<INVALID>", "WS", "ORG", "UNL" ]
+
+    RULE_document = 0
+    RULE_sentence = 1
+    RULE_orgPart = 2
+    RULE_unlPart = 3
+
+    ruleNames =  [ "document", "sentence", "orgPart", "unlPart" ]
+
+    EOF = Token.EOF
+    T__0=1
+    T__1=2
+    T__2=3
+    T__3=4
+    WS=5
+    ORG=6
+    UNL=7
+
+    def __init__(self, input:TokenStream, output:TextIO = sys.stdout):
+        super().__init__(input, output)
+        self.checkVersion("4.9.3")
+        self._interp = ParserATNSimulator(self, self.atn, self.decisionsToDFA, self.sharedContextCache)
+        self._predicates = None
+
+
+
+
+    class DocumentContext(ParserRuleContext):
+        __slots__ = 'parser'
+
+        def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1):
+            super().__init__(parent, invokingState)
+            self.parser = parser
+            self.out = None
+            self.s = None # SentenceContext
+
+        def sentence(self):
+            return self.getTypedRuleContext(docParser.SentenceContext,0)
+
+
+        def getRuleIndex(self):
+            return docParser.RULE_document
+
+        def enterRule(self, listener:ParseTreeListener):
+            if hasattr( listener, "enterDocument" ):
+                listener.enterDocument(self)
+
+        def exitRule(self, listener:ParseTreeListener):
+            if hasattr( listener, "exitDocument" ):
+                listener.exitDocument(self)
+
+
+
+
+    def document(self):
+
+        localctx = docParser.DocumentContext(self, self._ctx, self.state)
+        self.enterRule(localctx, 0, self.RULE_document)
+        try:
+            self.enterOuterAlt(localctx, 1)
+            self.state = 8
+            self.match(docParser.T__0)
+            self.state = 9
+            localctx.s = self.sentence()
+            self.state = 10
+            self.match(docParser.T__1)
+            localctx.out = doc.Document(localctx.s.out)
+        except RecognitionException as re:
+            localctx.exception = re
+            self._errHandler.reportError(self, re)
+            self._errHandler.recover(self, re)
+        finally:
+            self.exitRule()
+        return localctx
+
+
+    class SentenceContext(ParserRuleContext):
+        __slots__ = 'parser'
+
+        def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1):
+            super().__init__(parent, invokingState)
+            self.parser = parser
+            self.out = None
+            self.o = None # OrgPartContext
+            self.u = None # UnlPartContext
+
+        def orgPart(self):
+            return self.getTypedRuleContext(docParser.OrgPartContext,0)
+
+
+        def unlPart(self):
+            return self.getTypedRuleContext(docParser.UnlPartContext,0)
+
+
+        def getRuleIndex(self):
+            return docParser.RULE_sentence
+
+        def enterRule(self, listener:ParseTreeListener):
+            if hasattr( listener, "enterSentence" ):
+                listener.enterSentence(self)
+
+        def exitRule(self, listener:ParseTreeListener):
+            if hasattr( listener, "exitSentence" ):
+                listener.exitSentence(self)
+
+
+
+
+    def sentence(self):
+
+        localctx = docParser.SentenceContext(self, self._ctx, self.state)
+        self.enterRule(localctx, 2, self.RULE_sentence)
+        try:
+            self.enterOuterAlt(localctx, 1)
+            self.state = 13
+            self.match(docParser.T__2)
+            self.state = 14
+            localctx.o = self.orgPart()
+            self.state = 15
+            localctx.u = self.unlPart()
+            self.state = 16
+            self.match(docParser.T__3)
+            localctx.out = doc.Sentence(localctx.o.out, localctx.u.out)
+        except RecognitionException as re:
+            localctx.exception = re
+            self._errHandler.reportError(self, re)
+            self._errHandler.recover(self, re)
+        finally:
+            self.exitRule()
+        return localctx
+
+
+    class OrgPartContext(ParserRuleContext):
+        __slots__ = 'parser'
+
+        def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1):
+            super().__init__(parent, invokingState)
+            self.parser = parser
+            self.out = None
+            self.o = None # Token
+
+        def ORG(self):
+            return self.getToken(docParser.ORG, 0)
+
+        def getRuleIndex(self):
+            return docParser.RULE_orgPart
+
+        def enterRule(self, listener:ParseTreeListener):
+            if hasattr( listener, "enterOrgPart" ):
+                listener.enterOrgPart(self)
+
+        def exitRule(self, listener:ParseTreeListener):
+            if hasattr( listener, "exitOrgPart" ):
+                listener.exitOrgPart(self)
+
+
+
+
+    def orgPart(self):
+
+        localctx = docParser.OrgPartContext(self, self._ctx, self.state)
+        self.enterRule(localctx, 4, self.RULE_orgPart)
+        try:
+            self.enterOuterAlt(localctx, 1)
+            self.state = 19
+            localctx.o = self.match(docParser.ORG)
+            localctx.out = doc.OrgPart((None if localctx.o is None else localctx.o.text))
+        except RecognitionException as re:
+            localctx.exception = re
+            self._errHandler.reportError(self, re)
+            self._errHandler.recover(self, re)
+        finally:
+            self.exitRule()
+        return localctx
+
+
+    class UnlPartContext(ParserRuleContext):
+        __slots__ = 'parser'
+
+        def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1):
+            super().__init__(parent, invokingState)
+            self.parser = parser
+            self.out = None
+            self.u = None # Token
+
+        def UNL(self):
+            return self.getToken(docParser.UNL, 0)
+
+        def getRuleIndex(self):
+            return docParser.RULE_unlPart
+
+        def enterRule(self, listener:ParseTreeListener):
+            if hasattr( listener, "enterUnlPart" ):
+                listener.enterUnlPart(self)
+
+        def exitRule(self, listener:ParseTreeListener):
+            if hasattr( listener, "exitUnlPart" ):
+                listener.exitUnlPart(self)
+
+
+
+
+    def unlPart(self):
+
+        localctx = docParser.UnlPartContext(self, self._ctx, self.state)
+        self.enterRule(localctx, 6, self.RULE_unlPart)
+        try:
+            self.enterOuterAlt(localctx, 1)
+            self.state = 22
+            localctx.u = self.match(docParser.UNL)
+            localctx.out = doc.UnlPart((None if localctx.u is None else localctx.u.text))
+        except RecognitionException as re:
+            localctx.exception = re
+            self._errHandler.reportError(self, re)
+            self._errHandler.recover(self, re)
+        finally:
+            self.exitRule()
+        return localctx
+
+
+
+
+
diff --git a/grammar/org/org.g4 b/grammar/org/org.g4
new file mode 100644
index 0000000..729dcd8
--- /dev/null
+++ b/grammar/org/org.g4
@@ -0,0 +1,30 @@
+//=============================================================================
+// ANTLR Grammar for UNL Document
+//=============================================================================
+
+grammar org;
+
+
+//=============================================================================
+// Parser Grammar
+//=============================================================================
+
+//---------------------------------------------------------
+// Origin NL sentence
+//---------------------------------------------------------
+
+orgPart
+  : ORG
+  ;
+
+
+//=============================================================================
+// Lexer Grammar
+//=============================================================================
+
+// ignore whitespaces
+WS              : (' '|'\n'|'\t'|'\r'|'\u000C')+ -> skip ;
+
+// other tokens
+ORG             : '{org:en}' (.)* '{/org}' ;
+
diff --git a/grammar/unl/unl.g4 b/grammar/unl/unl.g4
new file mode 100644
index 0000000..dac12c8
--- /dev/null
+++ b/grammar/unl/unl.g4
@@ -0,0 +1,30 @@
+//=============================================================================
+// ANTLR Grammar for UNL Document
+//=============================================================================
+
+grammar unl;
+
+
+//=============================================================================
+// Parser Grammar
+//=============================================================================
+
+//---------------------------------------------------------
+// UNL representation
+//---------------------------------------------------------
+
+unlPart
+  : UNL
+  ;
+
+
+//=============================================================================
+// Lexer Grammar
+//=============================================================================
+
+// ignore whitespaces
+WS              : (' '|'\n'|'\t'|'\r'|'\u000C')+ -> skip ;
+
+// other tokens
+ORG             : '{org:en}' (.)* '{/org}' ;
+UNL             : '{unl}' (.)* '{/unl}' ;
diff --git a/input.txt b/input.txt
new file mode 100644
index 0000000..ed7718c
--- /dev/null
+++ b/input.txt
@@ -0,0 +1,10 @@
+[D]
+[S:R1]
+{org:en}
+The system allows a radio channel to take on two states: Listening Traffic and.
+{/org}
+{unl}
+aoj( a, b )
+{/unl}
+[/S]
+[/D]
diff --git a/parse.py b/parse.py
new file mode 100644
index 0000000..fa573f0
--- /dev/null
+++ b/parse.py
@@ -0,0 +1,115 @@
+#!/usr/bin/python3.10
+# -*-coding:Utf-8 -*
+
+#==============================================================================
+# unlAnt: parse
+#------------------------------------------------------------------------------
+# Main script to parse an UNL document file  
+#==============================================================================
+
+#==============================================================================
+# Importing required modules
+#==============================================================================
+
+import sys
+from subprocess import Popen, PIPE
+from antlr4 import FileStream, CommonTokenStream, InputStream
+
+
+#==============================================================================
+# Parameters
+#==============================================================================
+
+# ANTLR Grammar
+
+doc_grammar = 'grammar/doc/doc.g4'
+org_grammar = 'grammar/org/org.g4'
+unl_grammar = 'grammar/unl/unl.g4'
+
+
+#==============================================================================
+# Utilities
+#==============================================================================
+
+def run_command(cmd):
+    with Popen(cmd, stdout=PIPE, stderr=PIPE, universal_newlines=True) as p:
+        p.poll()
+        p.stdout.flush()
+        p.stderr.flush()
+        stdout, stderr = p.communicate()
+    return p.returncode, stdout, stderr
+
+     
+#==============================================================================
+# Parsing Functions
+#==============================================================================
+
+def create_lexer_parser_with_antlr(grammar_file):
+    """ Create python lexer and parser using ANTLR4 """
+
+    cmd = ['antlr4', '-Dlanguage=Python3', grammar_file]   
+    print("--- Create python lexer and parser (run command: " + str(cmd) + ")")
+    code, out, err = run_command(cmd)
+    if code != 0:
+        print("Error in grammar: \n\n" + err)
+        
+        
+def instantiate_lexer_parser(input, antLexer, antParser):
+    """ Instantiate lexer and parser """
+    
+    print("--- Instantiate lexer and parser")
+    lexer = antLexer(input)
+    stream = CommonTokenStream(lexer)
+    parser = antParser(stream)
+    
+    return parser
+        
+    
+def parse_document(input):
+    
+    # -- Create python lexer and parser
+    create_lexer_parser_with_antlr(doc_grammar)
+    
+    # -- Import Lexer/Parser (after creation by ANTLR4)
+    from grammar.doc.docLexer import docLexer
+    from grammar.doc.docParser import docParser
+    
+    # -- Parse document
+    parser = instantiate_lexer_parser(input, docLexer, docParser)
+    print("--- Parse document to separate org part and unl part")
+    tree = parser.document()
+    print("----- resulting tree:\n" + tree.toStringTree(recog=parser))    
+    document = tree.out
+    
+    return document
+    
+
+def parse_org(input):
+    pass 
+    
+
+def parse_unl(input):
+    pass  
+    
+     
+#==============================================================================
+# Main Function
+#============================================================================== 
+
+def main(argv):
+    
+    # -- Read input file
+    input_file = argv[1]
+    input = FileStream(input_file)
+    
+    # -- Document Parsing
+    print("-- Document Parsing ")
+    document = parse_document(input)
+    org_part = document.sentence.org_part.to_string()
+    unl_part = document.sentence.unl_part.to_string()
+    print("----- org_part:\n" + org_part)
+    print("----- unl_part:\n" + unl_part)
+
+
+if __name__ == '__main__':
+    main(sys.argv)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..7545404
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+antlr4-python3-runtime==4.9.3
-- 
GitLab