o
    if                     @   sP   d Z ddlZddlT dddddd	d
ZeeZG dd dZG dd deZdS )a)  
Corpus reader for the Information Extraction and Entity Recognition Corpus.

NIST 1999 Information Extraction: Entity Recognition Evaluation
https://www.itl.nist.gov/iad/894.01/tests/ie-er/er_99/er_99.htm

This corpus contains the NEWSWIRE development test data for the
NIST 1999 IE-ER Evaluation.  The files were taken from the
subdirectory: ``/ie_er_99/english/devtest/newswire/*.ref.nwt``
and filenames were shortened.

The corpus contains the following files: APW_19980314, APW_19980424,
APW_19980429, NYT_19980315, NYT_19980403, and NYT_19980407.
    N)*z&Associated Press Weekly, 14 March 1998z&Associated Press Weekly, 24 April 1998z&Associated Press Weekly, 29 April 1998zNew York Times, 15 March 1998zNew York Times, 3 April 1998zNew York Times, 7 April 1998)APW_19980314APW_19980424APW_19980429NYT_19980315NYT_19980403NYT_19980407c                   @   s   e Zd ZdddZdd ZdS )IEERDocumentN c                 C   s"   || _ || _|| _|| _|| _d S N)textdocnodoctype	date_timeheadline)selfr   r   r   r   r    r   R/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/nltk/corpus/reader/ieer.py__init__+   s
   
zIEERDocument.__init__c                 C   sd   | j rd| j  }nddd | j D d d d }| jd ur.d| j d|dS d	| S )
N c                 S   s    g | ]}|d d dkr|qS )N   <r   ).0wr   r   r   
<listcomp>7   s     z)IEERDocument.__repr__.<locals>.<listcomp>   z...z<IEERDocument z: >z<IEERDocument: %r>)r   joinleavesr   r   )r   r   r   r   r   __repr__2   s   $
zIEERDocument.__repr__)NNNr
   )__name__
__module____qualname__r   r   r   r   r   r   r	   *   s    
r	   c                   @   s<   e Zd ZdZdddZdddZdd Zd	d
 Zdd ZdS )IEERCorpusReaderr   Nc                       t  fdd |dD S )Nc                        g | ]\}}t | j|d qS )encoding)StreamBackedCorpusView_read_blockr   fileidencr   r   r   r   D       z)IEERCorpusReader.docs.<locals>.<listcomp>Tconcatabspathsr   fileidsr   r-   r   docsB   
   

zIEERCorpusReader.docsc                    r$   )Nc                    r%   r&   )r(   _read_parsed_blockr*   r-   r   r   r   L   r.   z0IEERCorpusReader.parsed_docs.<locals>.<listcomp>Tr/   r2   r   r-   r   parsed_docsJ   r5   zIEERCorpusReader.parsed_docsc                    s    fdd  |D S )Nc                    s&   g | ]}  |jd ur  |qS r   )_parser   )r   docr-   r   r   r   T   s
    z7IEERCorpusReader._read_parsed_block.<locals>.<listcomp>)r)   )r   streamr   r-   r   r6   R   s   
z#IEERCorpusReader._read_parsed_blockc                 C   s0   t jj|dd}t|trtdi |S t|S )NDOCUMENT)
root_labelr   )nltkchunkieerstr2tree
isinstancedictr	   )r   r9   valr   r   r   r8   Z   s   
zIEERCorpusReader._parsec                 C   sd   g }	 |  }|s
n| dkrnq|| 	 |  }|sn|| | dkr+nqd|gS )NTz<DOC>z</DOC>
)readlinestripappendr   )r   r:   outliner   r   r   r)   a   s$   

zIEERCorpusReader._read_blockr   )	r    r!   r"   __doc__r4   r7   r6   r8   r)   r   r   r   r   r#   ?   s    

r#   )	rI   r=   nltk.corpus.reader.apititlessorted	documentsr	   CorpusReaderr#   r   r   r   r   <module>   s   
