o
    il?                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ dd Zd	d
 Zdd ZG dd deZedkrY	 d dlZe  dS dS )    N)ZipFilePathPointer)find_dir	find_filefind_jars_within_path)ParserI)DependencyGraph)taggedsents_to_conllc                  C   s   ddl m}  | g d}|jS )Nr   )RegexpTagger))z\.$.)z\,$,)z\?$?)z\($()z\)$))z\[$[)z\]$])z^-?[0-9]+(\.[0-9]+)?$CD)z(The|the|A|a|An|an)$DT)z&(He|he|She|she|It|it|I|me|Me|You|you)$PRP)z(His|his|Her|her|Its|its)$PRP$)z(my|Your|your|Yours|yours)$r   )z (on|On|in|In|at|At|since|Since)$IN)z (for|For|ago|Ago|before|Before)$r   )z(till|Till|until|Until)$r   )z(by|By|beside|Beside)$r   )z(under|Under|below|Below)$r   )z(over|Over|above|Above)$r   )z (across|Across|through|Through)$r   )z(into|Into|towards|Towards)$r   )z(onto|Onto|from|From)$r   )z.*able$JJ)z.*ness$NN)z.*ly$RB)z.*s$NNS)z.*ing$VBG)z.*ed$VBD)z.*r   )nltk.tagr	   tag)r	   _tagger r   J/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/nltk/parse/malt.pymalt_regex_tagger   s
    r!   c                 C   st   t j| r	| }nt| dd}g d}tt|}dd |D }h d}||s+J ttdd |s6J t	|S )	zE
    A module to find MaltParser .jar file and its dependencies.
    )MALT_PARSER)env_vars) r$   r$   c                 S   s   h | ]
}t j|d  qS )   )ospathsplit).0jarr   r   r    	<setcomp>H   s    z"find_maltparser.<locals>.<setcomp>>   	log4j.jar
libsvm.jarliblinear-1.8.jarc                 S   s   |  do	| dS )Nzmaltparser-z.jar)
startswithendswith)ir   r   r    <lambda>M   s    z!find_maltparser.<locals>.<lambda>)
r&   r'   existsr   setr   issubsetanyfilterlist)parser_dirname	_malt_dirmalt_dependencies
_malt_jars_jarsr   r   r    find_maltparser=   s   r>   c                 C   s*   | du rdS t j| r| S t| dddS )z8
    A module to find pre-trained MaltParser model.
    Nmalt_temp.mco)
MALT_MODELF)r#   verbose)r&   r'   r3   r   )model_filenamer   r   r    find_malt_modelR   s
   rC   c                   @   sb   e Zd ZdZ				dddZddd	Zdd
dZdddZedddZ	dddZ
dddZdS )
MaltParsera  
    A class for dependency parsing with MaltParser. The input is the paths to:
    - (optionally) a maltparser directory
    - (optionally) the path to a pre-trained MaltParser .mco model file
    - (optionally) the tagger to use for POS tagging before parsing
    - (optionally) additional Java arguments

    Example:
        >>> from nltk.parse import malt
        >>> # With MALT_PARSER and MALT_MODEL environment set.
        >>> mp = malt.MaltParser(model_filename='engmalt.linear-1.7.mco') # doctest: +SKIP
        >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
        (shot I (elephant an) (in (pajamas my)) .)
        >>> # Without MALT_PARSER and MALT_MODEL environment.
        >>> mp = malt.MaltParser('/home/user/maltparser-1.9.2/', '/home/user/engmalt.linear-1.7.mco') # doctest: +SKIP
        >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
        (shot I (elephant an) (in (pajamas my)) .)
    r$   Nc                 C   sZ   t || _|dur|ng | _t|| _| jdk| _t | _|dur'|| _
dS t	 | _
dS )a  
        An interface for parsing with the Malt Parser.

        :param parser_dirname: The path to the maltparser directory that
            contains the maltparser-1.x.jar
        :type parser_dirname: str
        :param model_filename: The name of the pre-trained model with .mco file
            extension. If provided, training will not be required.
            (see http://www.maltparser.org/mco/mco.html and
            see http://www.patful.com/chalk/node/185)
        :type model_filename: str
        :param tagger: The tagger used to POS tag the raw string before
            formatting to CONLL format. It should behave like `nltk.pos_tag`
        :type tagger: function
        :param additional_java_args: This is the additional Java arguments that
            one can use when calling Maltparser, usually this is the heapsize
            limits, e.g. `additional_java_args=['-Xmx1024m']`
            (see https://javarevisited.blogspot.com/2011/05/java-heap-space-memory-size-jvm.html)
        :type additional_java_args: list
        Nr?   )r>   	malt_jarsadditional_java_argsrC   model_trainedtempfile
gettempdirworking_dirr!   tagger)selfr9   rB   rL   rF   r   r   r    __init__r   s   


zMaltParser.__init__Fnullc              
   c   s   | j stdtjd| jddd}tjd| jddd~}t|D ]	}|t| q"|  | j	|j
|j
dd}t }zttj| jd	  W n   Y | ||}	t| |	d	krktd
d||	f t|j
}
|
 dD ]}tt||dgV  qxW d   n1 sw   Y  W d   n1 sw   Y  W d   n1 sw   Y  t|j
 t|j
 dS )a  
        Use MaltParser to parse multiple POS tagged sentences. Takes multiple
        sentences where each sentence is a list of (word, tag) tuples.
        The sentences must have already been tokenized and tagged.

        :param sentences: Input sentences to parse
        :type sentence: list(list(tuple(str, str)))
        :return: iter(iter(``DependencyGraph``)) the dependency graph
            representation of each sentence
        z0Parser has not been trained. Call train() first.zmalt_input.conll.wFprefixdirmodedeletezmalt_output.conll.parserT   r   z0MaltParser parsing (%s) failed with exit code %d z

top_relation_labelN)rH   	ExceptionrI   NamedTemporaryFilerK   r   writestrclosegenerate_malt_commandnamer&   getcwdchdirr'   r(   rG   _executejoinopenreaditerr   remove)rM   	sentencesrA   rZ   
input_fileoutput_filelinecmd_current_pathretinfiletree_strr   r   r    parse_tagged_sents   sd   


1zMaltParser.parse_tagged_sentsc                    s"    fdd|D } j |||dS )an  
        Use MaltParser to parse multiple sentences.
        Takes a list of sentences, where each sentence is a list of words.
        Each sentence will be automatically tagged with this
        MaltParser instance's tagger.

        :param sentences: Input sentences to parse
        :type sentence: list(list(str))
        :return: iter(DependencyGraph)
        c                 3   s    | ]}  |V  qd S )N)rL   )r)   sentencerM   r   r    	<genexpr>       z)MaltParser.parse_sents.<locals>.<genexpr>rY   )rs   )rM   rj   rA   rZ   tagged_sentencesr   ru   r    parse_sents   s   zMaltParser.parse_sentsc                 C   s   dg}|| j 7 }tjdrdnd}|d|| jg7 }|dg7 }tj| j	r6|dtj
| j	d g7 }n|d| j	g7 }|d	|g7 }|d
krM|d|g7 }|d|g7 }|S )a  
        This function generates the maltparser command use at the terminal.

        :param inputfilename: path to the input file
        :type inputfilename: str
        :param outputfilename: path to the output file
        :type outputfilename: str
        javawin;:z-cpzorg.maltparser.Maltz-cz-irV   z-oz-m)rF   sysplatformr/   re   rE   r&   r'   r3   rG   r(   )rM   inputfilenameoutputfilenamerT   rn   classpaths_separatorr   r   r    r`      s    



z MaltParser.generate_malt_commandc                 C   s&   |rd nt j}t j| ||d}| S )N)stdoutstderr)
subprocessPIPEPopenwait)rn   rA   outputpr   r   r    rd     s   zMaltParser._executec                 C   sv   t jd| jddd}ddd |D }|t| W d   n1 s&w   Y  | j|j|d	 t	|j dS )
z
        Train MaltParser from a list of ``DependencyGraph`` objects

        :param depgraphs: list of ``DependencyGraph`` objects for training input data
        :type depgraphs: DependencyGraph
        malt_train.conll.rP   FrQ   
c                 s   s    | ]}| d V  qdS )
   N)to_conll)r)   dgr   r   r    rv      rw   z#MaltParser.train.<locals>.<genexpr>NrA   )
rI   r\   rK   re   r]   r^   train_from_filera   r&   ri   )rM   	depgraphsrA   rk   	input_strr   r   r    train  s   	
zMaltParser.trainc              	   C   s   t |trFtjd| jddd/}| }| }|t| W d   n1 s*w   Y  | j	|j
|dW  d   S 1 sAw   Y  | j|dd}| ||}|d	krbtd
d||f d| _dS )z
        Train MaltParser from a file
        :param conll_file: str for the filename of the training input data
        :type conll_file: str
        r   rP   FrQ   Nr   learnrW   r   z1MaltParser training (%s) failed with exit code %drX   T)
isinstancer   rI   r\   rK   rf   rg   r]   r^   r   ra   r`   rd   r[   re   rH   )rM   
conll_filerA   rk   conll_input_file	conll_strrn   rp   r   r   r    r   '  s(   
	

 	
zMaltParser.train_from_file)r$   NNN)FrO   )NN)F)__name__
__module____qualname____doc__rN   rs   ry   r`   staticmethodrd   r   r   r   r   r   r    rD   ^   s    

*
B
 
rD   __main__)inspectr&   r   r   rI   	nltk.datar   nltk.internalsr   r   r   nltk.parse.apir   nltk.parse.dependencygraphr   nltk.parse.utilr   r!   r>   rC   rD   r   doctesttestmodr   r   r   r    <module>   s(   	& gB