Skip to content
Snippets Groups Projects
Select Git revision
  • 0ce9b7821e0d3f2ac0fc0b733ce743c831727df9
  • mui5-annotation-on-video-stable default
  • get_setter_canvasSizeInformations
  • fix-error-div-into-p
  • annotation-on-video-v2
  • detached
  • annotation-on-video-r17
  • mui5
  • mui5-react-18
  • jacob-test
  • annotation-on-video protected
  • master
  • test-antoinev1
  • 20-fetch-thumbnail-on-annotation
  • add-research-field
  • Save
  • add-plugin
  • 14-wip-no-seek-to
  • 14-bug-on-video-time-control
  • 9_wip_videotests
  • _upgrade_material_ui
  • latest-tetras-16
  • v3.3.0
  • v3.2.0
  • v3.1.1
  • v3.1.0
  • v3.0.0
  • v3.0.0-rc.7
  • v3.0.0-rc.6
  • v3.0.0-rc.5
  • v3.0.0-rc.4
  • v3.0.0-rc.3
  • v3.0.0-rc.2
  • v3.0.0-rc.1
  • v3.0.0-beta.10
  • v3.0.0-beta.9
  • v3.0.0-beta.8
  • v3.0.0-beta.7
  • v3.0.0-beta.6
  • v3.0.0-beta.5
  • v3.0.0-beta.3
41 results

WindowCanvasNavigationControlsVideo.js

Blame
  • unlizeXml.py 4.76 KiB
    #!/bin/env python3
    
    from lxml import etree, objectify
    import requests
    import click
    import tempfile
    import os
    from subprocess import Popen, PIPE, STDOUT
    
    def remove_namespace(doc):
        #Remove namespace in the passed document in place
        for elem in doc.getiterator():
            elem.tag=etree.QName(elem.tag).localname
    
    def unlize(text, lang, dry_run=False):
    
        if (dry_run):
            return ('I UNLized the following text : ###'+text+'###')
    
        url = "http://unl.ru/etap-cgi/etap-cgi-old/cgiunl.exe"
    
        data = {
            'DOMAIN': 'SPORT',
            'password': 'guest',
            'TAGERROR': 'NO',
            'username': 'UNL_guest',
            'conversion': 'true',
            'language': lang,
            'data': text,
            'outputmode': 'text',
            'coding': 'utf-8',
            'translate': 'Process'
        }
    
        try:
            r = requests.post(url, data)
            # Remove garbage before first '['
            return ''.join(r.text.partition('[')[1:])
        except Exception as e:
            return 'Error calling unl.ru : "{error}"'.format(error=e)
    
    
    def nestedBody2Str(b):
        # Takes a node and return the children text nodes
        # Nested texts are separated by commas
        children = b.xpath('./node()')
        result = ''
        for child in children:
            if type(child) == etree._ElementUnicodeResult:
                result += str(child).strip()
            else:
                result += " "
                nested = child.xpath('.//text()')
                nestedStriped = [str(i).strip() for i in nested]
                nestedFiltered = filter(lambda s: not (s == ''), nestedStriped)
                result += ', '.join(nestedFiltered)
                result += ". "
        return (result.replace('.,','.').replace('..','.').replace(',,',',').replace(';,',';'))
    
    
    def addSubElement(parent, tag, text):
        sub = etree.SubElement(parent, tag)
        sub.text = etree.CDATA(text.replace("\r\n", "\n"))
        return sub
    
    
    def unl2dot(text, path):
        with tempfile.NamedTemporaryFile() as temp:
            out_name = os.path.basename(temp.name)
            out_dir = os.path.dirname(temp.name)
    
        with tempfile.NamedTemporaryFile(mode="w") as in_file:
            # Remove CRLF and flush output to avoid java errors
            in_file.write(text.replace("\r\n", "\n"))
            in_file.flush()
    
            # Run java parser
            cmd = ['java', '-jar', path,
                   '--input-file', in_file.name,
                   '--output-Dir', out_dir, '--output-file', out_name,
                   '--output-type', 'dot']
    
            with Popen(cmd, stdout=PIPE, stderr=STDOUT) as p:
                p.wait()
                p.stdout.flush()
                if p.returncode != 0:
                    print("Error in unl2rdf: \n\n"+p.stdout.read().decode())
                    print('UNL;')
                    print(text)
    
        # generate dot output
        fname = '{}/{}.dot'.format(out_dir, out_name)
        cmd = ['dot', '-Tsvg', fname]
        with Popen(cmd, stdout=PIPE, stderr=PIPE) as p:
            p.wait()
            if p.returncode != 0:
                print("Error creating svg: \n\n"+p.stderr.read().decode())
                print('UNL:')
                print(text)
                try:
                    with open(fname) as f:
                        print('DOT:')
                        print(f.read())
                except FileNotFoundError:
                    pass
            else:
                svg = p.stdout.read().decode()
                os.remove(fname)
                return svg
    
        return ""
    
    
    @click.command()
    @click.argument('input', nargs=1,
                    type=click.Path(dir_okay=False, exists=True))
    @click.argument('output', nargs=1,
                    type=click.Path(dir_okay=False, writable=True))
    @click.option('--lang', default='en',
                  type=click.Choice(['en', 'ru']))
    @click.option('--dry-run/--no-dry-run', default=False,
                  help='if true do not send request to unl.ru')
    @click.option('--svg/--no-svg', default=True,
                  help='Add svg node representing unl graph')
    @click.option('--unltools-path', nargs=1,
                  type=click.Path(dir_okay=False),
                  default='unl2rdf-app-1.0-SNAPSHOT-jar-with-dependencies.jar',
                  help='Path of the unltools jar')
    
    
    def unlizeXml(input, output, lang, dry_run, svg, unltools_path):
    
        parser = etree.XMLParser(remove_comments=True)
        doc = objectify.parse(input, parser=parser)
        remove_namespace(doc)
        tags = ['title', 'text_body', 'term', 'meaning']
        for t in tags:
            for node in doc.xpath('//'+t):
                if node.text:
                    addSubElement(node, 'orig', node.text)
                    node.text = ""
                    unl = addSubElement(node, 'unl', unlize(nestedBody2Str(node), lang, dry_run))
                    if(svg and not dry_run):
                        addSubElement(node, 'svg', unl2dot(unl.text, unltools_path))
    
        with open(output, 'w') as out:
            out.write(etree.tostring(doc, pretty_print=True).decode('utf-8'))
    
    
    if __name__ == '__main__':
        unlizeXml()