o
    i                     @   s   d dl Z G dd dZdS )    Nc                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	DependencyEvaluatora5  
    Class for measuring labelled and unlabelled attachment score for
    dependency parsing. Note that the evaluation ignores punctuation.

    >>> from nltk.parse import DependencyGraph, DependencyEvaluator

    >>> gold_sent = DependencyGraph("""
    ... Pierre  NNP     2       NMOD
    ... Vinken  NNP     8       SUB
    ... ,       ,       2       P
    ... 61      CD      5       NMOD
    ... years   NNS     6       AMOD
    ... old     JJ      2       NMOD
    ... ,       ,       2       P
    ... will    MD      0       ROOT
    ... join    VB      8       VC
    ... the     DT      11      NMOD
    ... board   NN      9       OBJ
    ... as      IN      9       VMOD
    ... a       DT      15      NMOD
    ... nonexecutive    JJ      15      NMOD
    ... director        NN      12      PMOD
    ... Nov.    NNP     9       VMOD
    ... 29      CD      16      NMOD
    ... .       .       9       VMOD
    ... """)

    >>> parsed_sent = DependencyGraph("""
    ... Pierre  NNP     8       NMOD
    ... Vinken  NNP     1       SUB
    ... ,       ,       3       P
    ... 61      CD      6       NMOD
    ... years   NNS     6       AMOD
    ... old     JJ      2       NMOD
    ... ,       ,       3       AMOD
    ... will    MD      0       ROOT
    ... join    VB      8       VC
    ... the     DT      11      AMOD
    ... board   NN      9       OBJECT
    ... as      IN      9       NMOD
    ... a       DT      15      NMOD
    ... nonexecutive    JJ      15      NMOD
    ... director        NN      12      PMOD
    ... Nov.    NNP     9       VMOD
    ... 29      CD      16      NMOD
    ... .       .       9       VMOD
    ... """)

    >>> de = DependencyEvaluator([parsed_sent],[gold_sent])
    >>> las, uas = de.eval()
    >>> las
    0.6
    >>> uas
    0.8
    >>> abs(uas - 0.8) < 0.00001
    True
    c                 C   s   || _ || _dS )z
        :param parsed_sents: the list of parsed_sents as the output of parser
        :type parsed_sents: list(DependencyGraph)
        N)_parsed_sents_gold_sents)selfparsed_sents
gold_sents r   N/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/nltk/parse/evaluate.py__init__G   s   
zDependencyEvaluator.__init__c                    s    h d d  fdd|D S )z
        Function to remove punctuation from Unicode string.
        :param input: the input string
        :return: Unicode string after remove all punctuation
        >   PcPdPePfPiPoPs c                 3   s"    | ]}t | vr|V  qd S )N)unicodedatacategory).0xpunc_catr   r	   	<genexpr>V   s     z4DependencyEvaluator._remove_punct.<locals>.<genexpr>)join)r   inStrr   r   r	   _remove_punctO   s   z!DependencyEvaluator._remove_punctc           
      C   s  t | jt | jkrtdd}d}d}tt | jD ]`}| j| j}| j| j}t |t |kr5td| D ]A\}}|| }	|d du rHq9|d |	d krTtd| |d dkr^q9|d7 }|d	 |	d	 krz|d7 }|d
 |	d
 krz|d7 }q9q|| || fS )z
        Return the Labeled Attachment Score (LAS) and Unlabeled Attachment Score (UAS)

        :return : tuple(float,float)
        zE Number of parsed sentence is different with number of gold sentence.r   z!Sentences must have equal length.wordNz!Sentence sequence is not matched.r      headrel)lenr   r   
ValueErrorrangenodesitemsr   )
r   corrcorrLtotaliparsed_sent_nodesgold_sent_nodesparsed_node_addressparsed_node	gold_noder   r   r	   evalX   s8   zDependencyEvaluator.evalN)__name__
__module____qualname____doc__r
   r   r/   r   r   r   r	   r      s
    :	r   )r   r   r   r   r   r	   <module>   s   