o
    im^                  	   @   s  d dl Zd dlZd dlZd dlmZmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ d dl m!Z! d dl"m#Z# dZ$dZ%d	Z&d
Z'dZ(dZ)dZ*dZ+i ddd ddd ddd ddd ddd ddd ddd ddd dd d d!d"d d#d$d d%d&d d'd(d d)d*d d+d,d d-d.d d/d0d d1d d2d d3d d4d d5d d6d d7d d8Z,G d9d: d:Z-G d;d< d<Z.d=d> Z/e0d?kre/  d>gZ1dS )@    N)ENDLEFTSUNKENButtonEntryFrameIntVarLabelMenu
OptionMenu	Scrollbar	StringVarTextTk)Font)
alpinobrowncess_catcess_espflorestaindian
mac_morphonps_chatsinica_treebanktreebank)ShowText)in_idlez[^/ ]+z\bz<<CL_EVENT>>z<<ST_EVENT>>z<<SE_EVENT>>z<<ELC_EVENT>>2   z)English: Brown Corpus (Humor, simplified)z%Catalan: CESS-CAT Corpus (simplified)c                   C      t jddS N	universal)tagset)r   tagged_sents r#   r#   S/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/nltk/app/concordance_app.py<lambda>;       r%   zEnglish: Brown Corpusc                   C      t  S Nr   r"   r#   r#   r#   r$   r%   >       z"English: Brown Corpus (simplified)c                   C   r   r   r)   r#   r#   r#   r$   r%   ?   r&   z)English: Brown Corpus (Press, simplified)c                   C   s   t jg dddS )N)news	editorialreviewsr    
categoriesr!   r)   r#   r#   r#   r$   r%   B   s    z,English: Brown Corpus (Religion, simplified)c                   C      t jdddS )Nreligionr    r.   r)   r#   r#   r#   r$   r%   E       z+English: Brown Corpus (Learned, simplified)c                   C   r0   )Nlearnedr    r.   r)   r#   r#   r#   r$   r%   H   r2   z3English: Brown Corpus (Science Fiction, simplified)c                   C   r0   )Nscience_fictionr    r.   r)   r#   r#   r#   r$   r%   K   r2   z+English: Brown Corpus (Romance, simplified)c                   C   r0   )Nromancer    r.   r)   r#   r#   r#   r$   r%   N   r2   c                   C   r0   )Nhumorr    r.   r)   r#   r#   r#   r$   r%   Q   r2   zEnglish: NPS Chat Corpusc                   C   r'   r(   r   tagged_postsr#   r#   r#   r$   r%   T   r*   z%English: NPS Chat Corpus (simplified)c                   C   r   r   r7   r#   r#   r#   r$   r%   U   r&   z#English: Wall Street Journal Corpusc                   C   r'   r(   r   r"   r#   r#   r#   r$   r%   X   r*   z0English: Wall Street Journal Corpus (simplified)c                   C   r   r   r9   r#   r#   r#   r$   r%   Y   r&   zChinese: Sinica Corpusc                   C   r'   r(   r   r"   r#   r#   r#   r$   r%   \   r*   z#Chinese: Sinica Corpus (simplified)c                   C   r   r   r:   r#   r#   r#   r$   r%   ]   r&   zDutch: Alpino Corpusc                   C   r'   r(   r   r"   r#   r#   r#   r$   r%   `   r*   z!Dutch: Alpino Corpus (simplified)c                   C   r   r   r;   r#   r#   r#   r$   r%   a   r&   c                   C   r   )N	hindi.pos)filesr   r"   r#   r#   r#   r$   r%   d   s    c                   C   r0   )Nr<   r    )r=   r!   r>   r#   r#   r#   r$   r%   e   r2   c                   C   r'   r(   r   r"   r#   r#   r#   r$   r%   h   r*   c                   C   r   r   r?   r#   r#   r#   r$   r%   i   r&   c                   C   r'   r(   r   r"   r#   r#   r#   r$   r%   l   r*   c                   C   r   r   r@   r#   r#   r#   r$   r%   m   r&   c                   C   r   r   )r   r"   r#   r#   r#   r$   r%   p   r&   )zHindi: Indian Languages Corpusz+Hindi: Indian Languages Corpus (simplified)z&Portuguese: Floresta Corpus (Portugal)z2Portuguese: Floresta Corpus (Portugal, simplified)z&Portuguese: MAC-MORPHO Corpus (Brazil)z2Portuguese: MAC-MORPHO Corpus (Brazil, simplified)z%Spanish: CESS-ESP Corpus (simplified)c                   @   sD  e Zd ZdZdZdZdZdZdZdd Z	d	d
 Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Z d7d8 Z!d9d: Z"d;d< Z#d=d> Z$d?d@ Z%dAdB Z&dCdD Z'dEdF Z(dGdH Z)dIdJ Z*dKdL Z+dMdN Z,dOS )PConcordanceSearchViewz#FFFz#F00
HL_WRD_TAGz#C0C0C0
HL_LBL_TAGg333333?c                 C   sb   t  | _t| j| _t | _| | j |   | 	| j | 
| jj | jt| j| _d S r(   )qQueuequeueConcordanceSearchModelmodelr   top	_init_top_init_menubar_init_widgetsload_corpusDEFAULT_CORPUSafterPOLL_INTERVAL_pollselfr#   r#   r$   __init__   s   
zConcordanceSearchView.__init__c                 C   s@   | d |d |d| j |d| j |dd d S )Nz950x680+50+50zNLTK Concordance Searchz<Control-q>WM_DELETE_WINDOWi  i  )geometrytitlebinddestroyprotocolminsize)rS   rI   r#   r#   r$   rJ      s
   

zConcordanceSearchView._init_topc                 C   sj   t |t| jdddd| _| | j | | j | | j | | j | | j | jj	ddd d S )N   )
backgroundpadxpadyborderbothT)fillexpand)
r   dict_BACKGROUND_COLOUR
main_frame_init_corpus_select_init_query_box_init_results_box_init_paging_init_statuspackrS   parentr#   r#   r$   rL      s   z#ConcordanceSearchView._init_widgetsc                 C   s  t | j| _t | j| _t | j| _t| j}t|ddd}|jdd| jdd |jdd|d t|dd	}t|dd	}|j	d
| jdd| j
d |j	d| jdd| j
d |j	d| jdd| j
d |d |jdd|d t|dd	}t|dd	}|j	d| jdd| jd |j	d| jdd| jd |j	d| jdd| jd |d |jdd|d t|dd	}|j	d| jdd| jd |j	d| jdd| jd |j	d| jdd| jd |d |jdd|d |jdd|d |jd d|d | jj|d! d S )"Nr   )tearoffborderwidthExitr\   zCtrl-q)label	underlinecommandacceleratorFile)rr   rs   menu)ro   20   )rr   variablers   valuert   50r   100d   zResult Countz60 characters<   z80 charactersP   z100 charactersBeforez70 charactersF   z90 charactersZ   z110 charactersn   AfterContextEdit)rw   )r   rI   _result_size_cntx_bf_len_cntx_af_lenr
   add_commandrY   add_cascadeadd_radiobuttonset_result_sizeinvokeset_cntx_bf_lenset_cntx_af_lenconfig)rS   menubarfilemenueditmenu
rescntmenucntxmenu
cntxbfmenu
cntxafmenur#   r#   r$   rK      s   




z#ConcordanceSearchView._init_menubarc                 K   s   | j  | j_d S r(   )r   getrH   result_countrS   kwargsr#   r#   r$   r      s   z%ConcordanceSearchView.set_result_sizec                 K      | j  | _d S r(   )r   r   _char_afterr   r#   r#   r$   r        z%ConcordanceSearchView.set_cntx_af_lenc                 K   r   r(   )r   r   _char_beforer   r#   r#   r$   r     r   z%ConcordanceSearchView.set_cntx_bf_lenc              	   C   s   t || jd}t|| _| j| jj t|td| jddddj	dd t
| jj | jj}t|| j| jjg| j R d	| ji}d|d
< d|d< |j	dd |j	dddd d S )Nr]   z	 Corpus:    r\   r   )justifytextr]   r^   r_   r`   left)sidert   rp   highlightthicknessrI   xnr   rb   anchor)r   re   r   varsetrH   rN   r	   r   rl   listCORPORAkeysremover   non_default_corporacorpus_selected)rS   rn   
innerframeother_corporaomr#   r#   r$   rg     s<   

z)ConcordanceSearchView._init_corpus_selectc              	   C   s.   t |tt| jdddd| _| jjddd d S )Nr   r\   )r   reliefr]   r`   r^   r_   rI   swr   r   )r	   r   r   re   statusrl   rm   r#   r#   r$   rk   $  s   	z"ConcordanceSearchView._init_statusc                 C   s   t || jd}t || jd}t|dd| _| jjddddd t|d	| jd
d
d| _| jjddddd | jd| j	 |  |jdddd d S )Nr   r   )widthr   r      center)r   rb   r_   r   Searchr\   )r   rt   rp   r   z<KeyPress-Return>rI   r   r   )
r   re   r   	query_boxrl   r   searchsearch_buttonrX   search_enter_keypress_handler)rS   rn   r   anotherr#   r#   r$   rh   0  s   z%ConcordanceSearchView._init_query_boxc                 G   s   |    d S r(   )r   rS   eventr#   r#   r$   r   A  s   z3ConcordanceSearchView.search_enter_keypress_handlerc                 C   s(  t |}t |}t |}t|dd}t|ddd}t|tddddd|j|jd	d
ddd
| _| jjdddd | jj| j| j	d | jj| j
| jd |jdddd |j| jjd |jddddd |j| jjd t|d| jdjddd |jddddd |jdddd |jdddd d S ) Nr\   )rp   horiz)rp   orientcourier16)familysizedisablednone40rx   )	fontstaterp   yscrollcommandxscrollcommandwrapr   heightexportselectionr   ra   T)r   rb   rc   )
foregroundyer   )rt   r   w)r   rb   rc   r   z   )r   r]   r   rI   r   bottoms)r   r   r   r   r   results_boxrl   
tag_config_HIGHLIGHT_WORD_TAG_HIGHLIGHT_WORD_COLOUR_HIGHLIGHT_LABEL_TAG_HIGHLIGHT_LABEL_COLOURr   yviewxviewr	   re   )rS   rn   r   i1i2
vscrollbar
hscrollbarr#   r#   r$   ri   D  sD   
z'ConcordanceSearchView._init_results_boxc              	   C   s~   t || jd}t|d| jddddd | _}|jddd	 t|d
| jddddd | _}|jddd	 |jddd d| _d S )Nr   Previous10r\   r   )r   rt   r   rp   r   r   r   r   r   NextrightrI   r   )r   rb   r   )	r   re   r   previousprevrl   __next__nextcurrent_page)rS   rn   r   r   r   r#   r#   r$   rj   i  s.   		
z"ConcordanceSearchView._init_pagingc                 C   s&   |    |   | j| jd  d S Nr\   )clear_results_boxfreeze_editablerH   r   r   rR   r#   r#   r$   r        zConcordanceSearchView.previousc                 C   s&   |    |   | j| jd  d S r   )r   r   rH   r   r   rR   r#   r#   r$   r     r   zConcordanceSearchView.__next__c                 G   sJ   d}d}zddl m} |||| jd  W d S    t| j|| Y d S )NzNLTK Concordance Search Demo
z#About: NLTK Concordance Search Demor   )Message)messagerW   rn   )tkinter.messageboxr   rf   showr   rI   )rS   r   ABOUTTITLEr   r#   r#   r$   about  s   zConcordanceSearchView.aboutc                 C   sD   | j t| j | j t| j | j t| j | j t| j	 d S r(   )
rI   rX   CORPUS_LOADED_EVENThandle_corpus_loadedSEARCH_TERMINATED_EVENThandle_search_terminatedSEARCH_ERROR_EVENThandle_search_errorERROR_LOADING_CORPUS_EVENThandle_error_loading_corpusrR   r#   r#   r$   _bind_event_handlers  s   z*ConcordanceSearchView._bind_event_handlersc                 C   s   z	| j jdd}W n
 tjy   Y n(w |tkr| | n|tkr(| | n|tkr2| 	| n	|t
kr;| | | jt| j| _d S )NF)block)rF   r   rD   Emptyr   r   r   r   r  r  r  r  rI   rO   rP   rQ   r   r#   r#   r$   rQ     s   
zConcordanceSearchView._pollc                 C   s0   d| j   | jd< |   |   |   d S )NzError in loading r   )r   r   r   unfreeze_editable	clear_allr   r   r#   r#   r$   r    s   z1ConcordanceSearchView.handle_error_loading_corpusc                 C   s2   | j  d | jd< |   |   | j  d S )Nz
 is loadedr   )r   r   r   r  r	  r   	focus_setr   r#   r#   r$   r     s   z*ConcordanceSearchView.handle_corpus_loadedc                 C   sb   | j  }| | d| jd< t|dkrd| j j | jd< n| j j| _|   | j	
| j d S )N r   r   zNo results found for )rH   get_resultswrite_resultsr   lenquerylast_requested_pager   r  r   xview_moveto_FRACTION_LEFT_TEXT)rS   r   resultsr#   r#   r$   r     s   



z.ConcordanceSearchView.handle_search_terminatedc                 C   s   d| j j | jd< |   d S )NzError in query r   )rH   r  r   r  r   r#   r#   r$   r    s   z)ConcordanceSearchView.handle_search_errorc                 G   s   | j  }| | d S r(   )r   r   rM   )rS   argsnew_selectionr#   r#   r$   r     s   
z%ConcordanceSearchView.corpus_selectedc                 C   s:   | j j|krd| d | jd< |   | j | d S d S )NzLoading z...r   )rH   selected_corpusr   r   rM   )rS   	selectionr#   r#   r$   rM     s
   z!ConcordanceSearchView.load_corpusc                 C   sd   d| _ |   | j  | j }t| dkrd S d| | jd< | 	  | j
|| j d  d S )Nr   zSearching for r   r\   )r   r   rH   reset_resultsr   r   r  stripr   r   r   )rS   r  r#   r#   r$   r     s   

zConcordanceSearchView.searchc              
   C   sT  d| j d< d}|D ]}|d  |d |d }}}t|dkr|| jk r0| |||\}}}||| j || j  }|t|ksF|d7 }| j t|d | | |||\}}	|D ]}
| j 	| j
t|d t|
d  t|d t|
d   q\|	D ]}
| j 	| jt|d t|
d  t|d t|
d   q~|d7 }q	d	| j d< d S )
Nnormalr   r\   r   r   
z.0.r   )r   r  r  r   padr   insertstrwords_and_labelstag_addr   r   )rS   r  roweachsentpos1pos2sentenceword_markerslabel_markersmarkerr#   r#   r$   r    s6   
 
z#ConcordanceSearchView.write_resultsc                 C   s   ||| }g g }}| d}d}|D ]F}	|	dkr|d7 }n7|	 d\}
}|| j| | j| t|
 f |t|
d 7 }|| j| | j| t| f |t|7 }|d7 }q||fS )N r   r  r\   /)splitappendr   r  )rS   r'  r%  r&  
search_expwordslabelslabeled_wordsindexr#  wordrr   r#   r#   r$   r     s$   



z&ConcordanceSearchView.words_and_labelsc                 C   sD   || j kr
|||fS | j | }ddg| | }||| || fS )Nr  r+  )r   join)rS   r$  hstarthenddr#   r#   r$   r    s
   


zConcordanceSearchView.padc                 G   s0   | j d u rd S | j | j | j   d | _ d S r(   )rI   after_cancelrO   rY   )rS   r   r#   r#   r$   rY     s
   


zConcordanceSearchView.destroyc                 C   s$   | j dt | j  |   d S Nr   )r   deleter   rH   reset_queryr   rR   r#   r#   r$   r	    s   
zConcordanceSearchView.clear_allc                 C   s&   d| j d< | j dt d| j d< d S )Nr  r   z1.0r   )r   r;  r   rR   r#   r#   r$   r   !  s   
z'ConcordanceSearchView.clear_results_boxc                 C   s,   d| j d< d| jd< d| jd< d| jd< d S )Nr   r   )r   r   r   r   rR   r#   r#   r$   r   &  s   


z%ConcordanceSearchView.freeze_editablec                 C   s    d| j d< d| jd< |   d S )Nr  r   )r   r   set_paging_button_statesrR   r#   r#   r$   r  ,  s   

z'ConcordanceSearchView.unfreeze_editablec                 C   sT   | j dks
| j dkrd| jd< nd| jd< | j| j r#d| jd< d S d| jd< d S )Nr   r\   r   r   r  )r   r   rH   has_more_pagesr   rR   r#   r#   r$   r=  1  s   
z.ConcordanceSearchView.set_paging_button_statesc                 C   s   | j j|dd d S )Ntail)when)rI   event_generater   r#   r#   r$   
fire_event;  s   z ConcordanceSearchView.fire_eventc                 O   s    t  rd S | jj|i | d S r(   )r   rI   mainloop)rS   r  r   r#   r#   r$   rC  ?  s   zConcordanceSearchView.mainloopN)-__name__
__module____qualname__re   r   r   r   r   r  rT   rJ   rL   rK   r   r   r   rg   rk   rh   r   ri   rj   r   r   r   r  rQ   r  r   r   r  r   rM   r   r  r   r  rY   r	  r   r   r  r=  rB  rC  r#   r#   r#   r$   rA   v   sV    
_%

rA   c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd ZG dd dejZG dd dejZdS )rG   c                 C   s8   || _ t| _t| _d | _|   |   d | _d| _	d S r:  )
rF   _CORPORAr   _DEFAULTrN   r  r<  r  r   last_sent_searched)rS   rF   r#   r#   r$   rT   F  s   
zConcordanceSearchModel.__init__c                 C   s0   g }| t| j  || j |  |S r(   )extendr   r   r   r   rN   sort)rS   copyr#   r#   r$   r   P  s
   z*ConcordanceSearchModel.non_default_corporac                 C   s$   || _ g | _| || }|  d S r(   )r  r"   
LoadCorpusstart)rS   namerunner_threadr#   r#   r$   rM   W  s   z"ConcordanceSearchModel.load_corpusc                 C   s$   || _ || _| | || j  d S r(   )r  r  SearchCorpusr   rN  )rS   r  pager#   r#   r$   r   ]  s   zConcordanceSearchModel.searchc                 C   s6   || _ t| j|k r| | j| d S | jt d S r(   )r  r  r  r   r  rF   putr   rS   rR  r#   r#   r$   r   b  s   zConcordanceSearchModel.nextc                 C   s   || _ | jt d S r(   )r  rF   rS  r   rT  r#   r#   r$   r   i  s   zConcordanceSearchModel.prevc                 C   s   d| _ g | _d | _d S r:  )rI  r  	last_pagerR   r#   r#   r$   r  m  s   
z$ConcordanceSearchModel.reset_resultsc                 C   s
   d | _ d S r(   )r  rR   r#   r#   r$   r<  r  s   
z"ConcordanceSearchModel.reset_queryc                 C   s   | j |d | d S r   )r  r  )rS   rR  	resultsetr#   r#   r$   set_resultsu  s   z"ConcordanceSearchModel.set_resultsc                 C   s   | j | jd  S r   )r  r  rR   r#   r#   r$   r  x  r   z"ConcordanceSearchModel.get_resultsc                 C   s4   | j g ks| j d g krdS | jd u rdS || jk S )Nr   FT)r  rU  rT  r#   r#   r$   r>  {  s
   

z%ConcordanceSearchModel.has_more_pagesc                   @   s   e Zd Zdd Zdd ZdS )z!ConcordanceSearchModel.LoadCorpusc                 C   s   t j|  ||| _| _d S r(   )	threadingThreadrT   rH   rO  )rS   rO  rH   r#   r#   r$   rT     s   z*ConcordanceSearchModel.LoadCorpus.__init__c              
   C   st   z| j j| j  }dd |D | j _| j jt W d S  ty9 } zt| | j jt	 W Y d }~d S d }~ww )Nc                 S   s    g | ]}d  dd |D qS )r+  c                 s   s     | ]\}}|d  | V  qdS )r,  Nr#   ).0r   tr#   r#   r$   	<genexpr>  s    zCConcordanceSearchModel.LoadCorpus.run.<locals>.<listcomp>.<genexpr>)r5  )rZ  r$  r#   r#   r$   
<listcomp>  s    z9ConcordanceSearchModel.LoadCorpus.run.<locals>.<listcomp>)
rH   r   rO  r"   rF   rS  r   	Exceptionprintr  )rS   tsr   r#   r#   r$   run  s   
z%ConcordanceSearchModel.LoadCorpus.runN)rD  rE  rF  rT   ra  r#   r#   r#   r$   rM    s    rM  c                   @   s$   e Zd Zdd Zdd Zdd ZdS )z#ConcordanceSearchModel.SearchCorpusc                 C   s&   |||| _ | _| _tj|  d S r(   )rH   countrR  rX  rY  rT   )rS   rH   rR  rb  r#   r#   r$   rT     s   z,ConcordanceSearchModel.SearchCorpus.__init__c              	   C   s*  |   }g dd}}}| jj| jjd  D ]J}zt||}W n tjy8   | j  | jj	t
 Y  d S w |r\||| | f |d7 }|| jkr\| j j|d 7  _ n|d7 }q| jt|kr| j j|d 7  _| j| j_| j| j| n| j| j|d d  | jj	t d S )Nr   r\   )processed_queryrH   r"   rI  rer   errorr  rF   rS  r  r.  rN  endrb  r  rR  rU  rW  r   )rS   rD   sent_posi
sent_countr$  mr#   r#   r$   ra    s.   



z'ConcordanceSearchModel.SearchCorpus.runc                 C   s   g }| j j D ]8}tdd|}td|r%|tt d | t  qd|v r3|t| t  q|t| d t t  qd	|S )Nz\.z[^/ ]z[A-Z]+$r,  r+  )
rH   r  r-  re  submatchr.  BOUNDARYWORD_OR_TAGr5  )rS   newtermr#   r#   r$   rd    s   
z3ConcordanceSearchModel.SearchCorpus.processed_queryN)rD  rE  rF  rT   ra  rd  r#   r#   r#   r$   rQ    s    rQ  N)rD  rE  rF  rT   r   rM   r   r   r   r  r<  rW  r  r>  rX  rY  rM  rQ  r#   r#   r#   r$   rG   E  s    
rG   c                  C   s   t  } |   d S r(   )rA   rC  )r8  r#   r#   r$   app  s   rr  __main__)2rF   rD   re  rX  tkinterr   r   r   r   r   r   r   r	   r
   r   r   r   r   r   tkinter.fontr   nltk.corpusr   r   r   r   r   r   r   r   r   r   nltk.draw.utilr   	nltk.utilr   ro  rn  r   r   r  r  rP   rH  rG  rA   rG   rr  rD  __all__r#   r#   r#   r$   <module>   s   @0"#&'*<   Rx
