o
    iI                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ dZG dd	 d	eZG d
d deZG dd deZG dd deZdS )    N)PIPE)_java_optionsconfig_javafind_jar_iterfind_jars_within_pathjava)ParserIDependencyGraph)Treez2https://nlp.stanford.edu/software/lex-parser.shtmlc                   @   s   e Zd ZdZdZdZdZdZdZ								
dddZ	dd Z
dddZdddZdddZdddZdddZdddZdS )GenericStanfordParserz Interface to the Stanford Parserz+stanford-parser-(\d+)(\.(\d+))+-models\.jarzstanford-parser\.jarz3edu.stanford.nlp.parser.lexparser.LexicalizedParserFN4edu/stanford/nlp/models/lexparser/englishPCFG.ser.gzutf8-mx4g c              
   C   s   t t| j|ddt|dddd d}t t| j|ddt|ddd	d d}	tj|d
 }
t|	gt	|
 | _
|| _|| _|| _|| _d S )N)STANFORD_PARSERSTANFORD_CORENLP T)env_vars
searchpathurlverboseis_regexc                 S      t j| S Nospathdirname
model_pathr   r   N/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/nltk/parse/stanford.py<lambda>;       z0GenericStanfordParser.__init__.<locals>.<lambda>)key)STANFORD_MODELSr   c                 S   r   r   r   r   r   r   r!   r"   H   r#   r   )maxr   _JAR_stanford_url_MODEL_JAR_PATTERNr   r   splittupler   
_classpathr    	_encodingcorenlp_optionsjava_options)selfpath_to_jarpath_to_models_jarr    encodingr   r/   r.   stanford_jar	model_jarstanford_dirr   r   r!   __init__&   s<   		
zGenericStanfordParser.__init__c              	   C   s   g }g }g }d}| dD ]?}|dkrE|r!|t| g }d}q| jr4|| d| g }d}q|t| d|g g }q|| d}qt|S )NFr   
T)
splitlinesappenditer_DOUBLE_SPACED_OUTPUT
_make_treejoin)r0   output_res	cur_lines	cur_treesblankliner   r   r!   _parse_trees_outputV   s&   
z)GenericStanfordParser._parse_trees_outputc              
   C   sB   | j d| jddd| jdddg
}| | |dd	d
 |D |S )a  
        Use StanfordParser to parse multiple sentences. Takes multiple sentences as a
        list where each sentence is a list of words.
        Each sentence will be automatically tagged with this StanfordParser instance's
        tagger.
        If whitespaces exists inside a token, then the token will be treated as
        separate tokens.

        :param sentences: Input sentences to parse
        :type sentences: list(list(str))
        :rtype: iter(iter(Tree))
        -model
-sentencesnewline-outputFormat
-tokenizedz-escaperz-edu.stanford.nlp.process.PTBEscapingProcessorr8   c                 s   s    | ]}d  |V  qdS ) Nr>   .0sentencer   r   r!   	<genexpr>       z4GenericStanfordParser.parse_sents.<locals>.<genexpr>_MAIN_CLASSr    _OUTPUT_FORMATrE   _executer>   r0   	sentencesr   cmdr   r   r!   parse_sentsm   s    z!GenericStanfordParser.parse_sentsc                 C      t | |g|S )a&  
        Use StanfordParser to parse a sentence. Takes a sentence as a string;
        before parsing, it will be automatically tokenized and tagged by
        the Stanford Parser.

        :param sentence: Input sentence to parse
        :type sentence: str
        :rtype: iter(Tree)
        )nextraw_parse_sentsr0   rO   r   r   r   r!   	raw_parse      
zGenericStanfordParser.raw_parsec                 C   s2   | j d| jddd| jg}| | |d||S )aI  
        Use StanfordParser to parse multiple sentences. Takes multiple sentences as a
        list of strings.
        Each sentence will be automatically tokenized and tagged by the Stanford Parser.

        :param sentences: Input sentences to parse
        :type sentences: list(str)
        :rtype: iter(iter(Tree))
        rF   rG   rH   rI   r8   rR   rV   r   r   r!   r\      s   	z%GenericStanfordParser.raw_parse_sentsc                 C   rZ   )a0  
        Use StanfordParser to parse a sentence. Takes a sentence as a list of
        (word, tag) tuples; the sentence must have already been tokenized and
        tagged.

        :param sentence: Input sentence to parse
        :type sentence: list(tuple(str, str))
        :rtype: iter(Tree)
        )r[   tagged_parse_sentsr]   r   r   r!   tagged_parse   r_   z"GenericStanfordParser.tagged_parsec                    sR   d | j d| jddd| jdd dd	d
dg}| | |d fdd|D |S )ad  
        Use StanfordParser to parse multiple sentences. Takes multiple sentences
        where each sentence is a list of (word, tag) tuples.
        The sentences must have already been tokenized and tagged.

        :param sentences: Input sentences to parse
        :type sentences: list(list(tuple(str, str)))
        :rtype: iter(iter(Tree))
        /rF   rG   rH   rI   rJ   z-tagSeparatorz-tokenizerFactoryz,edu.stanford.nlp.process.WhitespaceTokenizerz-tokenizerMethodnewCoreLabelTokenizerFactoryr8   c                 3   s(    | ]}d   fdd|D V  qdS )rK   c                 3   s    | ]}  |V  qd S r   rL   )rN   taggedtag_separatorr   r!   rP      rQ   zEGenericStanfordParser.tagged_parse_sents.<locals>.<genexpr>.<genexpr>NrL   rM   re   r   r!   rP      s
    
z;GenericStanfordParser.tagged_parse_sents.<locals>.<genexpr>rR   rV   r   re   r!   r`      s2   
z(GenericStanfordParser.tagged_parse_sentsc           	      C   s$  | j }|d|g | jr|| j  dt}t| j|d tj	dddT}t
|tr5|r5||}|| |  | jrS|d t|| j|ttd\}}n||j t|| jttd	\}}|d
d}|dd}||}W d    n1 sw   Y  t|j t|dd |S )Nz	-encodingrK   )optionsr   wbF)modedeleter   )	classpathstdinstdoutstderr)rk   rm   rn   s        s    )r-   extendr.   r*   r>   r   r   r/   tempfileNamedTemporaryFile
isinstancestrencodewriteflush
_USE_STDINseekr   r,   r   r:   namereplacedecoder   unlink)	r0   rX   input_r   r3   default_options
input_filerm   rn   r   r   r!   rU      s>   





zGenericStanfordParser._execute)NNr   r   Fr   r   F)__name__
__module____qualname____doc__r)   r'   rS   rx   r<   r7   rE   rY   r^   r\   ra   r`   rU   r   r   r   r!   r      s,    
0




'r   c                       ,   e Zd ZdZdZ fddZdd Z  ZS )StanfordParsera  
    >>> parser=StanfordParser(
    ...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
    ... ) # doctest: +SKIP

    >>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog")) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
    Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
    Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])]

    >>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents((
    ...     "the quick brown fox jumps over the lazy dog",
    ...     "the quick grey wolf jumps over the lazy fox"
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
    Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
    Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP',
    [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['grey']), Tree('NN', ['wolf'])]), Tree('NP',
    [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']),
    Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])]

    >>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents((
    ...     "I 'm a dog".split(),
    ...     "This is my friends ' cat ( the tabby )".split(),
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]),
    Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP',
    [Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']),
    Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', [Tree('', []),
    Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', [])])])])])])])]

    >>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents((
    ...     (
    ...         ("The", "DT"),
    ...         ("quick", "JJ"),
    ...         ("brown", "JJ"),
    ...         ("fox", "NN"),
    ...         ("jumped", "VBD"),
    ...         ("over", "IN"),
    ...         ("the", "DT"),
    ...         ("lazy", "JJ"),
    ...         ("dog", "NN"),
    ...         (".", "."),
    ...     ),
    ... ))],[]) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
    Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP',
    [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]
    pennc                    &   t jdtdd t j|i | d S )NzcThe StanfordParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPParser[0m instead.   
stacklevelwarningswarnDeprecationWarningsuperr7   r0   argskwargs	__class__r   r!   r7   F     zStanfordParser.__init__c                 C   s
   t |S r   )r   
fromstringr0   resultr   r   r!   r=   P  s   
zStanfordParser._make_treer   r   r   r   rT   r7   r=   __classcell__r   r   r   r!   r     s
    2
r   c                       r   )StanfordDependencyParsera
  
    >>> dep_parser=StanfordDependencyParser(
    ...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
    ... ) # doctest: +SKIP

    >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])])]

    >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
    [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')),
    ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')),
    ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')),
    ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents((
    ...     "The quick brown fox jumps over the lazy dog.",
    ...     "The quick grey wolf jumps over the lazy fox."
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])]),
    Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), Tree('fox', ['over', 'the', 'lazy'])])]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents((
    ...     "I 'm a dog".split(),
    ...     "This is my friends ' cat ( the tabby )".split(),
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['the'])])]

    >>> sum([[list(parse.triples()) for parse in dep_graphs] for dep_graphs in dep_parser.tagged_parse_sents((
    ...     (
    ...         ("The", "DT"),
    ...         ("quick", "JJ"),
    ...         ("brown", "JJ"),
    ...         ("fox", "NN"),
    ...         ("jumped", "VBD"),
    ...         ("over", "IN"),
    ...         ("the", "DT"),
    ...         ("lazy", "JJ"),
    ...         ("dog", "NN"),
    ...         (".", "."),
    ...     ),
    ... ))],[]) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [[((u'jumped', u'VBD'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')),
    ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')),
    ((u'jumped', u'VBD'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')),
    ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]]

    	conll2007c                    r   )NzwThe StanfordDependencyParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPDependencyParser[0m instead.r   r   r   r   r   r   r!   r7     r   z!StanfordDependencyParser.__init__c                 C      t |ddS )Nroottop_relation_labelr	   r   r   r   r!   r=        z#StanfordDependencyParser._make_treer   r   r   r   r!   r   T  s
    0
r   c                       sJ   e Zd ZdZdZdZdZdZdZdZ	 fddZ
dd
dZdd Z  ZS )StanfordNeuralDependencyParserar  
    >>> from nltk.parse.stanford import StanfordNeuralDependencyParser # doctest: +SKIP
    >>> dep_parser=StanfordNeuralDependencyParser(java_options='-mx4g')# doctest: +SKIP

    >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy']), '.'])]

    >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
    [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det',
    (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'),
    u'amod', (u'brown', u'JJ')), ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')),
    ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det',
    (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ')), ((u'jumps', u'VBZ'),
    u'punct', (u'.', u'.'))]]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents((
    ...     "The quick brown fox jumps over the lazy dog.",
    ...     "The quick grey wolf jumps over the lazy fox."
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over',
    'the', 'lazy']), '.']), Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']),
    Tree('fox', ['over', 'the', 'lazy']), '.'])]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents((
    ...     "I 'm a dog".split(),
    ...     "This is my friends ' cat ( the tabby )".split(),
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends',
    ['my', "'"]), Tree('tabby', ['-LRB-', 'the', '-RRB-'])])]
    conllz)edu.stanford.nlp.pipeline.StanfordCoreNLPz%stanford-corenlp-(\d+)(\.(\d+))+\.jarz,stanford-corenlp-(\d+)(\.(\d+))+-models\.jarTc                    s4   t jdtdd t j|i | |  jd7  _d S )Nz}The StanfordNeuralDependencyParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPDependencyParser[0m instead.r   r   z(-annotators tokenize,ssplit,pos,depparse)r   r   r   r   r7   r.   r   r   r   r!   r7     s   z'StanfordNeuralDependencyParser.__init__Fc                 C   s   t d)z
        Currently unimplemented because the neural dependency parser (and
        the StanfordCoreNLP pipeline class) doesn't support passing in pre-
        tagged tokens.
        zxtagged_parse[_sents] is not supported by StanfordNeuralDependencyParser; use parse[_sents] or raw_parse[_sents] instead.)NotImplementedError)r0   rW   r   r   r   r!   r`     s   z1StanfordNeuralDependencyParser.tagged_parse_sentsc                 C   r   )NROOTr   r	   r   r   r   r!   r=     r   z)StanfordNeuralDependencyParser._make_treer   )r   r   r   r   rT   rS   r'   r)   rx   r<   r7   r`   r=   r   r   r   r   r!   r     s    
r   )r   rq   r   
subprocessr   nltk.internalsr   r   r   r   r   nltk.parse.apir   nltk.parse.dependencygraphr
   	nltk.treer   r(   r   r   r   r   r   r   r   r!   <module>   s    vCA