o
    i                     @   sp   d dl Z d dlmZ d dlT d dlT G dd deZG dd deZdd	 Z	d
d Z
edkr6e	  e
  dS dS )    N)util)*c                   @   sR   e Zd ZdddZdddZdddZdd	d
ZdddZdddZdddZ	dS )ChasenCorpusReaderutf8Nc                 C   s   || _ t| ||| d S N)_sent_splitterCorpusReader__init__)selfrootfileidsencodingsent_splitter r   T/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/nltk/corpus/reader/chasen.pyr	      s   zChasenCorpusReader.__init__c                       t  fdd |dD S )Nc              	      $   g | ]\}}t ||d d d  jqS )FChasenCorpusViewr   .0fileidencr
   r   r   
<listcomp>       z,ChasenCorpusReader.words.<locals>.<listcomp>Tconcatabspathsr
   r   r   r   r   words   
   

zChasenCorpusReader.wordsc                    r   )Nc              	      $   g | ]\}}t ||d dd jqS TFr   r   r   r   r   r      r   z3ChasenCorpusReader.tagged_words.<locals>.<listcomp>Tr   r   r   r   r   tagged_words   r!   zChasenCorpusReader.tagged_wordsc                    r   )Nc              	      s$   g | ]\}}t ||d dd  jqS FTr   r   r   r   r   r   %   r   z,ChasenCorpusReader.sents.<locals>.<listcomp>Tr   r   r   r   r   sents#   r!   zChasenCorpusReader.sentsc                    r   )Nc              	      s$   g | ]\}}t ||d d d jqS r#   r   r   r   r   r   r   -   r   z3ChasenCorpusReader.tagged_sents.<locals>.<listcomp>Tr   r   r   r   r   tagged_sents+   r!   zChasenCorpusReader.tagged_sentsc                    r   )Nc              	      r"   r%   r   r   r   r   r   r   5   r   z,ChasenCorpusReader.paras.<locals>.<listcomp>Tr   r   r   r   r   paras3   r!   zChasenCorpusReader.parasc                    r   )Nc              	      r   )Tr   r   r   r   r   r   =   r   z3ChasenCorpusReader.tagged_paras.<locals>.<listcomp>Tr   r   r   r   r   tagged_paras;   r!   zChasenCorpusReader.tagged_paras)r   Nr   )
__name__
__module____qualname__r	   r    r$   r&   r'   r(   r)   r   r   r   r   r      s    





r   c                   @   s$   e Zd ZdZ	dddZdd ZdS )r   z
    A specialized corpus view for ChasenReader. Similar to ``TaggedCorpusView``,
    but this'll use fixed sets of word and sentence tokenizer.
    Nc                 C   s,   || _ || _|| _|| _tj| ||d d S )Nr   )_tagged_group_by_sent_group_by_parar   StreamBackedCorpusViewr	   )r
   corpus_filer   taggedgroup_by_sentgroup_by_parar   r   r   r   r	   J   s
   	zChasenCorpusView.__init__c           
      C   s  g }t |ddD ]|}g }g }| D ]E}| dk}|d}|d d|dd f}	|s3||	 |s=| jrW| |	rW| jsGdd	 |D }| jrP|| n|	| g }qt
|dkrv| jshd
d	 |D }| jrq|| n|	| | jr|| q|	| q|S )zReads one paragraph at a time..z^EOS\nEOS	r      Nc                 S      g | ]\}}|qS r   r   r   wtr   r   r   r   i       z/ChasenCorpusView.read_block.<locals>.<listcomp>c                 S   r:   r   r   r;   r   r   r   r   r   r>   )read_regexp_block
splitlinesstripsplitjoinappendr   r.   r/   extendlenr0   )
r
   streamblockpara_strparasentline_eos_cellsr<   r   r   r   
read_blockY   s8   



zChasenCorpusView.read_blockr   )r*   r+   r,   __doc__r	   rO   r   r   r   r   r   D   s
    
r   c                  C   sf   dd l } ddlm} |dtddd}td| dd	  td
dd | dd D  d S )Nr   LazyCorpusLoaderjeita.*chasenutf-8r-   /iTV  i|V  z
EOS
c                 s   s$    | ]}d  dd |D V  qdS )
c                 s   s.    | ]}d  |d |d dd V  qdS )z{}/{}r   r9   r8      N)formatrB   )r   r<   r   r   r   	<genexpr>   s   , z!demo.<locals>.<genexpr>.<genexpr>N)rC   )r   rK   r   r   r   rZ      s
    
zdemo.<locals>.<genexpr>iz  i}  )nltknltk.corpus.utilrR   r   printrC   r    r'   )r[   rR   rS   r   r   r   demo   s   
r^   c                  C   s:   ddl m}  | dtddd}t| d d tsJ d S )Nr   rQ   rS   rT   rU   r-   r9   )r\   rR   r   
isinstancer$   str)rR   rS   r   r   r   test   s   ra   __main__)sysnltk.corpus.readerr   nltk.corpus.reader.apinltk.corpus.reader.utilr   r   r1   r   r^   ra   r*   r   r   r   r   <module>   s   6=
