o
    ii                     @   sH   d Z ddlT ddlT ddlmZmZ G dd deZG dd deZ	dS )	z
Indian Language POS-Tagged Corpus
Collected by A Kumaran, Microsoft Research, India
Distributed with permission

Contents:
  - Bangla: IIT Kharagpur
  - Hindi: Microsoft Research India
  - Marathi: IIT Bombay
  - Telugu: IIIT Hyderabad
    )*)map_tag	str2tuplec                   @   s8   e Zd ZdZdddZdddZdddZdd	d
ZdS )IndianCorpusReaderz@
    List of words, one per line.  Blank lines are ignored.
    Nc                 C      t dd | |dD S )Nc                 S   s   g | ]\}}t ||d d qS )FIndianCorpusView.0fileidenc r   T/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/nltk/corpus/reader/indian.py
<listcomp>!       z,IndianCorpusReader.words.<locals>.<listcomp>Tconcatabspathsselffileidsr   r   r   words   
   
zIndianCorpusReader.wordsc                    @   r j kr fddnd tfdd |dD S )Nc                       t  j| S Nr   _tagsettr   tagsetr   r   <lambda>)       z1IndianCorpusReader.tagged_words.<locals>.<lambda>c                    s    g | ]\}}t ||d d qS )TFr   r	   tag_mapping_functionr   r   r   -       z3IndianCorpusReader.tagged_words.<locals>.<listcomp>Tr   r   r   r   r   r!   r   r   r%   r!   r   tagged_words'      

zIndianCorpusReader.tagged_wordsc                 C   r   )Nc                 S   s   g | ]\}}t ||d dqS )FTr   r	   r   r   r   r   5   r   z,IndianCorpusReader.sents.<locals>.<listcomp>Tr   r   r   r   r   sents3   r   zIndianCorpusReader.sentsc                    r   )Nc                    r   r   r   r   r    r   r   r"   =   r#   z1IndianCorpusReader.tagged_sents.<locals>.<lambda>c                    s    g | ]\}}t ||d d  qS )Tr   r	   r$   r   r   r   A   r&   z3IndianCorpusReader.tagged_sents.<locals>.<listcomp>Tr'   r(   r   r)   r   tagged_sents;   r+   zIndianCorpusReader.tagged_sentsr   )NN)__name__
__module____qualname____doc__r   r*   r,   r-   r   r   r   r   r      s    


r   c                   @   s    e Zd Z	dddZdd ZdS )r   Nc                 C   s&   || _ || _|| _tj| ||d d S )N)encoding)_tagged_group_by_sent_tag_mapping_functionStreamBackedCorpusView__init__)r   corpus_filer2   taggedgroup_by_sentr%   r   r   r   r7   I   s   zIndianCorpusView.__init__c                    sd   |  }|drg S dd | D } jr  fdd|D } js*dd |D } jr0|gS |S )N<c                 S   s   g | ]}t |d dqS )_)sep)r   )r
   wordr   r   r   r   U   s    z/IndianCorpusView.read_block.<locals>.<listcomp>c                    s   g | ]\}}|  |fqS r   )r5   r
   wr   r   r   r   r   W   s    c                 S   s   g | ]\}}|qS r   r   r?   r   r   r   r   Y   s    )readline
startswithsplitr5   r3   r4   )r   streamlinesentr   rA   r   
read_blockQ   s   
zIndianCorpusView.read_blockr   )r.   r/   r0   r7   rH   r   r   r   r   r   H   s    
r   N)
r1   nltk.corpus.reader.apinltk.corpus.reader.utilnltk.tagr   r   CorpusReaderr   r6   r   r   r   r   r   <module>   s   .