o
    i                     @   s   d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZmZ ddlmZ ddlmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lm Z  ddl!m"Z" G dd dZ#dd Z$e%dkrre$  dgZ&dS )zl
A graphical tool for exploring the regular expression based chunk
parser ``nltk.chunk.RegexpChunkParser``.
    N)
ButtonCanvasCheckbuttonFrameIntVarLabelMenu	ScrollbarTextTk)askopenfilenameasksaveasfilename)Font)
ChunkScoreRegexpChunkParser)RegexpChunkRule)	conll2000treebank_chunk)ShowText)Tree)in_idlec                   @   s  e Zd ZdZi dddddddd	d
dddddddddddddddddddddd d!d"d#i d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQZg dRZdSedTdUfdVedWdUfdXedYdZfd[ed\d]fd^ed\d]fd_ed`d`dafdbedcdddafdeedfdUfdgedhdUfdiedjdUfg
ZdkZ	 dlZ		 dmZ
	 dnZ	 doZ	 edpdqdrdrdkdsdtdudvZedldldrdrdwdkdsdtdudx	Zedydzd{d{dkdsdtdud|d}	Zed~dsdtdZedddZeddtdtddZed{d{dkdsdtdddZeddddZdZdrZedrdZZeddZZdZdd Z					dddZdd Zdd Zdd Zdd ZdZ dZ!dd Z"dZ#dd Z$dd Z%dd Z&dZ'dd Z(dd Z)dd Z*dd Z+dd Z,dd Z-dd Z.dd Z/dd Z0dddZ1dd Z2dd Z3dd Z4dd Z5dddÄZ6ddń Z7ddǄ Z8ddɄ Z9dZ:ddd̄Z;ddd΄Z<dddЄZ=dd҄ Z>dddԄZ?dddքZ@dd؄ ZAdS )RegexpChunkAppz
    A graphical tool for exploring the regular expression based chunk
    parser ``nltk.chunk.RegexpChunkParser``.

    See ``HELP`` for instructional text.
    CCzCoordinating conjunctionzPRP$zPossessive pronounCDzCardinal numberRBAdverbDT
DeterminerRBRzAdverb, comparativeEXzExistential thereRBSzAdverb, superlativeFWzForeign wordRPParticleJJ	AdjectiveTOtoJJRzAdjective, comparativeUHInterjectionJJSzAdjective, superlativeVBzVerb, base formLSzList item markerVBDzVerb, past tenseMDModalNNSzNoun, pluralNNzNoun, singular or massVBNzVerb, past participleVBZzVerb,3rd ps. sing. presentNNPzProper noun, singularNNPSzProper noun pluralWDTzwh-determinerPDTPredeterminerWPz
wh-pronounPOSzPossessive endingzWP$zPossessive wh-pronounPRPzPersonal pronounWRBz	wh-adverb(zopen parenthesis)zclose parenthesisz
open quotecommazclose quoteperiodzpound sign (currency marker)zdollar sign (currency marker)zPreposition/subord. conjunctionz#Symbol (mathematical or scientific)zVerb, gerund/present participlezVerb, non-3rd ps. sing. presentcolon)z``,z''.#$INSYMVBGVBP:))Help20a-  Welcome to the regular expression chunk-parser grammar editor.  You can use this editor to develop and test chunk parser grammars based on NLTK's RegexpChunkParser class.

Use this box ('Help') to learn more about the editor; click on the tabs for help on specific topics:<indent>
Rules: grammar rule types
Regexps: regular expression syntax
Tags: part of speech tags
</indent>
Use the upper-left box ('Grammar') to edit your grammar.  Each line of your grammar specifies a single 'rule', which performs an action such as creating a chunk or merging two chunks.

The lower-left box ('Development Set') runs your grammar on the development set, and displays the results.  Your grammar's chunks are <highlight>highlighted</highlight>, and the correct (gold standard) chunks are <underline>underlined</underline>.  If they match, they are displayed in <green>green</green>; otherwise, they are displayed in <red>red</red>.  The box displays a single sentence from the development set at a time; use the scrollbar or the next/previous buttons view additional sentences.

The lower-right box ('Evaluation') tracks the performance of your grammar on the development set.  The 'precision' axis indicates how many of your grammar's chunks are correct; and the 'recall' axis indicates how many of the gold standard chunks your system generated.  Typically, you should try to design a grammar that scores high on both metrics.  The exact precision and recall of the current grammar, as well as their harmonic mean (the 'f-score'), are displayed in the status bar at the bottom of the window.)Rules10a  <h1>{...regexp...}</h1><indent>
Chunk rule: creates new chunks from words matching regexp.</indent>

<h1>}...regexp...{</h1><indent>
Strip rule: removes words matching regexp from existing chunks.</indent>

<h1>...regexp1...}{...regexp2...</h1><indent>
Split rule: splits chunks that match regexp1 followed by regexp2 in two.</indent>

<h1>...regexp...{}...regexp...</h1><indent>
Merge rule: joins consecutive chunks that match regexp1 and regexp2</indent>
)Regexps10 60aZ  <h1>Pattern		Matches...</h1>
<hangindent>	<<var>T</var>>	a word with tag <var>T</var> (where <var>T</var> may be a regexp).
	<var>x</var>?	an optional <var>x</var>
	<var>x</var>+	a sequence of 1 or more <var>x</var>'s
	<var>x</var>*	a sequence of 0 or more <var>x</var>'s
	<var>x</var>|<var>y</var>	<var>x</var> or <var>y</var>
	.	matches any character
	(<var>x</var>)	Treats <var>x</var> as a group
	# <var>x...</var>	Treats <var>x...</var> (to the end of the line) as a comment
	\<var>C</var>	matches character <var>C</var> (useful when <var>C</var> is a special character like + or #)
</hangindent>
<h1>Examples:</h1>
<hangindent>	<regexp><NN></regexp>
		Matches <match>"cow/NN"</match>
		Matches <match>"green/NN"</match>
	<regexp><VB.*></regexp>
		Matches <match>"eating/VBG"</match>
		Matches <match>"ate/VBD"</match>
	<regexp><IN><DT><NN></regexp>
		Matches <match>"on/IN the/DT car/NN"</match>
	<regexp><RB>?<VBD></regexp>
		Matches <match>"ran/VBD"</match>
		Matches <match>"slowly/RB ate/VBD"</match>
\t<regexp><\#><CD> # This is a comment...</regexp>\n		Matches <match>"#/# 100/CD"</match>
</hangindent>)TagsrQ   zB<h1>Part of Speech Tags:</h1>
<hangindent><<TAGSET>></hangindent>
redz#a00
foregroundgreenz#080	highlightz#ddd
background	underlineT)rZ   h1indent   lmargin1lmargin2
hangindentr   <   varz#88fregexpz#ba7matchz#6a6      g?g{Gz?g{Gz?(      z#efegroove   word)widthheightrY   highlightbackgroundhighlightthicknessreliefborderwrapz#555)	rm   rn   rY   ro   rU   rp   rq   rr   rs   F   
   z#eef)   )	rm   rn   rY   ro   rp   rq   rr   rs   tabsz#9bb)rY   rq   rr   	helveticaifamilysizez#777   )rY   padxpadyrr   i,  i  )rY   ro   rp   rq   rr   rm   rn   )rY   activebackgroundro   z#aba   c                 C   sD   t dd|}t dd|}t dd|}| }t dd|}|S )	N((\\.|[^#])*)(#.*)?\1z + z\n\s+z\nz	([^\\])\$z\1\\$)resubstrip)selfgrammar r   S/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/nltk/app/chunkparser_app.pynormalize_grammar4  s   z RegexpChunkApp.normalize_grammarr   N NPc                 C   sz  || _ |du r
| j}|| _|du r*|dkrtd}n|dkr$t }ntd| d| _	 || _	 d| _		 d| _
	 || _	 || _	 d| _	 d| _	 g | _	 d| _	 d| _	 d| _	 d| _	 t|d| _	 t  }| _|d	 |d
 |d| j t|| _| jd | | |  | | !| | "| | j#$  |r| j#%d|d  | j#&dd | 'd | (  dS )a  
        :param devset_name: The name of the development set; used for
            display & for save files.  If either the name 'treebank'
            or the name 'conll2000' is used, and devset is None, then
            devset will be set automatically.
        :param devset: A list of chunked sentences
        :param grammar: The initial grammar to display.
        :param tagset: Dictionary from tags to string descriptions, used
            for the help page.  Defaults to ``self.TAGSET``.
        Nr   z	train.txttreebankzUnknown development set %sr   chunk_labelz+50+50zRegexp Chunk Parser Appz<Control-q>d   end
insert1.0))_chunk_labelTAGSETtagsetr   chunked_sentsr   
ValueErrorchunkerr   normalized_grammargrammar_changeddevsetdevset_namedevset_index_last_keypress_history_history_index_eval_grammar_eval_normalized_grammar_eval_indexr   _eval_scorer   topgeometrytitlebinddestroyr   _devset_sizeset_init_fonts_init_widgets_init_bindings_init_menubar
grammarboxfocusr   mark_setshow_devsetupdate)r   r   r   r   r   r   r   r   r   r   __init__?  sl   









zRegexpChunkApp.__init__c                    s   | d j | d j | d j | d j | d fdd | d fd	d  j d j  j d j  j d j  j d
 j d S )Nz<Control-n>z<Control-p>z<Control-t>z
<KeyPress>z<Control-s>c                          S N)save_grammarer   r   r   <lambda>      z/RegexpChunkApp._init_bindings.<locals>.<lambda>z<Control-o>c                    r   r   )load_grammarr   r   r   r   r     r   z<Configure>)r   _devset_next_devset_prevtoggle_show_tracer   r   evalbox
_eval_plotr   r   r   r   r   r     s   zRegexpChunkApp._init_bindingsc                 C   sR   t || _| jd td| j  d| _tdt| j d d  d| _d S )Nr]   rx   ry      )r   _sizer   r   get_fontint
_smallfontr   r   r   r   r     s   
zRegexpChunkApp._init_fontsc                 C   s  t |}t |dd}|jdd| jd |jddd| jd |jddd	| jd |jd
d| jd |jdd| jdd |jdd|d t |dd}|jd| j	dd| j
d |jd| j	dd| j
d |jd| j	dd| j
d |jd| j	dd| j
d |jd| j	dd| j
d |jdd|d t |dd}|jd| jd| jd  |jd!| jd"| jd  |jd#| jd$| jd  |jd%| jd&| jd  |jd'd|d t |dd}|jd(d| jd |jd)d|d |j|d* d S )+Nr   )tearoffzReset Application)labelrZ   commandzSave Current GrammarzCtrl-s)r   rZ   acceleratorr   zLoad GrammarzCtrl-ozSave Grammar History   Exitrf   zCtrl-q)r   rZ   r   r   File)r   rZ   menuTinyru   )r   variablerZ   valuer   Small   Mediumr]   Large   Huge"   Viewz50 sentences2   )r   r   r   r   z100 sentencesr   z200 sentences   z500 sentencesi  zDevelopment-SetAboutrL   )r   )r   add_commandresetr   r   save_historyr   add_cascadeadd_radiobuttonr   resizer   set_devset_sizeaboutconfig)r   parentmenubarfilemenuviewmenu
devsetmenuhelpmenur   r   r   r     s   
zRegexpChunkApp._init_menubarc                 G   s   | j r	|   dS |   dS )Nbreak)_showing_tracer   
show_tracer   r   r   r   r   r   #  s
   z RegexpChunkApp.toggle_show_trace   Fc                 O   s\  | d| j }| d| j }| jd | jjd|d d dddd	}| j|d d
 |d }}| jj||| d  |d dddd}d| j|d d }}	| jd }
| j| jj	dd|d d|
|
d | j| jj	d|	d dd|
|
d | j
  rt| jdkrd }}d }}tdtt| j| jd D ] }| j|  \}}}}t||}t||}t||}t||}qt|d d}t|d d}t|d d}t|d d}nd }}d }}tdD ]T}||| |d | ||    }|	|	| |d | ||    }||  k r|k r&n n| jj||||	dd ||  k r2|	k r?n q| jj||||dd q| j||||	 | j||	||	 | jj|d |	dddd|  d	 | jj|d |dddd|  d	 | jj||	d dddd|  d	 | jj||	d dddd|  d	 d  }}t| jD ]\}\}}}}||| || ||    }|	|	| || ||    }|| jkr| jj|d |d |d |d d d!d d"|d  d#|d   d$|d   | jd%< n| j| jj|d |d |d |d d&d'd |d ur%| j  r%| j| jj||||d'd ||}}qd S )(Nrm   rn   allru   rk   leftw	Precision)justifyanchortextr   sRecallcenter)r   r   r   rf   rY   r   i  )filloutlineg{Gz?   g      $@z#888)r  r|   rightsez%d%%r   nenwz#0f0z#000zPrecision: %.2f%%	zRecall: %.2f%%	zF-score: %.2f%%r   #afaz#8c8)r   r   winfo_widthwinfo_heightdeletecreate_textbbox_EVALBOX_PARAMSlowercreate_rectangle
_autoscalelenr   rangemin_SCALE_Nmaxcreate_line	enumerater   create_ovalstatus_eval_lines)r   r   r   rm   rn   tagr   r  r   botbgmax_precision
max_recallmin_precision
min_recallir   	precisionrecallfmeasurexyprev_xprev_y_fscorer   r   r   r   -  s   
 






	



zRegexpChunkApp._eval_plotc           	      C   s"  | j d u rd S | jd u rd| _d S t }t | j | jk r5| j| jkr5d| _| j t	| j
d | jS | j| jkr~| jD ]-\}}}}| j| |krk| j||||f t| jd | _|   d| _d | _ d S q>d| _t| jd| _| j| _| j| _| j dkrd| _d S | j| jt| j| j | j  D ]}| | }| j || q|  j| j7  _| j| j kr| j| j| j! | j" | j# f t| jd | _|   d| _d | _d S d| j | j  }d	| | j$d
< d| _| %t |  | j t	| j
d | j d S )NFTi  rf   r   r   r   r   z$Evaluating on Development Set (%d%%)r   )&r   r   _eval_demon_runningtimer   _EVAL_DELAYr   r   afterr   
_EVAL_FREQ_eval_demonr   r   appendr  r   r   r   r   r   r   r   r   r   r   r  _EVAL_CHUNKr   r   _chunkparseleavesscorer&  r'  	f_measurer  _adaptively_modify_eval_chunk)	r   t0gprfgoldguessprogressr   r   r   r4    sh   


zRegexpChunkApp._eval_demonc                 C   s   || j kr#| jdkr#t| jd tt| j| j |  | jd | _dS || jk rAt| jd tt| j| j|  | jd | _dS dS )z
        Modify _EVAL_CHUNK to try to keep the amount of time that the
        eval demon takes between _EVAL_DEMON_MIN and _EVAL_DEMON_MAX.

        :param t: The amount of time that the eval demon took.
        r   rf   ru   N)_EVAL_DEMON_MAXr6  r  r  r   _EVAL_DEMON_MIN)r   tr   r   r   r;    s"   


z,RegexpChunkApp._adaptively_modify_eval_chunkc                    s  t |fi  j}|jddd |jddd |jddd |jddd t|fd ji j _t| jd	d
 jd d _	 j	j
dddd  jj
dddd t| jjd}|j
dddd  jj|jd  jd }t ||d}|j
dddd t|fd jd jjdd t|fd jd jjdd t|fd ji j _ jj
dddd i  _ jd }t ||d}|j
dddd t jD ]<\}\}}	}
t|| jd}|j
|d ddd |d|f fdd	 | j|< t |d j|dj
|d d dd  qЈ j jd d  j jd!  jjd"d#d$  jD ]\}} jjd%| fi | q'  jd d  t| jjd} jj|jd |j
dddd t | jd d}t|fd ji j  _! j!jd#d&d' t| jd(d) j d d* _" j"j
dddd |j
dddd t| j#d _$ j$j
dddd t| j!j%d+d, _& j&j j!d-<  j&jd.d/d0  jd }t ||d}|j
dd1dd t|fd2 j'd jjdd t|fd3 j(d jjdd t|fd4 j)d5d6 j _* j*jd)d t|fd7 j+d j _, j,jd)d t-|fi  j. _/t| jd8d) j.d d*}|j
dddd  j/j
ddddd9  jd }t ||d}|j
dd1dd t0 j1 _2 j2d: t3|f j2 j4d;d< jjdd t0 j1 _5 j5d: t3|f j5 j4d=d< jjdd t|fd>d?i jjd)d t|fd ji j6 _7 j7j
dd@dAddddB d5 jdC< d5 j!dC<  jd }t |dDd|dj
ddd  t |ddD|dj
ddd  t |dEd|dj
ddFd  |jd&d#dG  j!jdHdIdJdK  j!jdLdJdMdN  j!jdOdPd  j!jdQdRdSdT  j!jdUdVdSdW  j!jdXdMdY  jjdXdZd  jjd[d\dY  jjd]d^dY  jjd_d`dY  jjdaddbdc d S )dNr      )weightr|   rk   rf   r   fontGrammar:blackrY   )rI  r   highlightcolorrY   SW)columnrowstickyNEWS)r   NWS)yscrollcommandrX   EWzPrev Grammar)r   r   r   )sidezNext Grammar)r   rI  Sz<ButtonPress>c                    s
     |S r   )	show_help)r   tabr   r   r   r   N  s   
 z.RegexpChunkApp._init_widgets.<locals>.<lambda>)rn   rm   rY   )rN  rO  )rI  elideT)rY  tag-%sboth)expandr  zDevelopment Set:r  )rI  r   r   rY   horiz)r   orientxscrollcommandbottomr)  )rU  r     zPrev Example (Ctrl-p)zNext Example (Ctrl-n)zShow exampledisabled)r   r   statez
Show tracezEvaluation:)rN  rO  rP  
columnspanFZoom)r   r   r   Linesr   History	   NEW)rN  rO  rP  r}   r~   rd  rc  ru   r      )r  r\  true-posr
  True)rY   rZ   	false-negz#800)rZ   rU   	false-posz#faatracez#666none)rU   rs   
wrapindentrv   )r`   rs   errorrT   z#feccommentz#840anglez#00fbracez#0a0ra   rh   r^   )8r   _FRAME_PARAMSgrid_columnconfiguregrid_rowconfigurer
   r   _GRAMMARBOX_PARAMSr   r   grammarlabelgridr	   yviewr   r   r   _history_prev_BUTTON_PARAMSpack_history_nextr   _HELPBOX_PARAMShelpboxhelptabsr  HELPr   _HELPTAB_SPACER	configure
tag_configHELP_AUTOTAGrW  _DEVSETBOX_PARAMS	devsetboxdevsetlabel_devset_scrolldevset_scrollxviewdevset_xscrollr   r   r   devset_buttonr   trace_buttonr   r  r   r   r   r  r   r   r  _STATUS_PARAMSr  )r   r   frame0grammar_scrollbarr   frame3helptab_framer%  rX  tabstopsr   r   r  paramshelp_scrollbarframe4frame1frame2r   r   r   r     sr  









zRegexpChunkApp._init_widgetsc                    sX  d| _ d| jd< d| jd< d| jd< | jdd d| jd | j f | jd	< | j	d u r>| j
dd
 | jddd d S | j| j }| j	 }d}dg t| D ]\}\}}|d| 7 } t| qT fddtt|d D | _dd tt|d D | _tt|d D ]}|dkr| j
dd | jddd n| j
dd||d    | jddd | j
d|d  | jddd t|d | }	| | }
| |}| |
}||D ]	}| ||d q|| D ]	}| ||d q|| D ]
}| ||d qq| j
dd | jddd | jd| jjdd d S )NTrb  rc  normalr   r   Development Set (%d/%d)rf   r   z#Trace: waiting for a valid grammar.rr  	z%s c                    s,   i | ]}t t D ]	}||f | q
qS r   )r  r  ).0r%  jcharnumr   r   
<dictcomp>  s    
z-RegexpChunkApp.show_trace.<locals>.<dictcomp>c                 S   s   i | ]	}||d  d  qS )rk   r   )r  r%  r   r   r   r    s    r   zStart:
ro  zend -2c linestartzend -2cz
Apply %s:
r   rq  rk  rm  rn  z
Finished.
r   g333333?)r   r  r  r  r  r   r   r   r  r   r   tag_addr   rulesr  r8  r5  r  r  r  linenumr   r7  _chunksintersection_color_chunkr   r2  r  r   )r   r   	gold_treer  tagseqwordnumrl   posr%  r   	test_treegold_chunkstest_chunkschunkr   r  r   r     sZ   








zRegexpChunkApp.show_tracec           
      C   sl  d| j d< | j dd | jD ]\}}}||kr|dddd tt| j d	d
 dD }| j	| j
di | j | j j
|d | j d|d  d}| jD ]N\}}d| d| d}t||D ]:}	| j d||	d ||	d  | j d| ||	d ||	d  | j d||	d ||	d  qeqRq| j	| j
di | j qd| j d< d S )Nr  rc  r   r   z
<<TAGSET>>r   c                 s       | ]}d | V  qdS )z	%s	%sNr   )r  itemr   r   r   	<genexpr>  s
    
z+RegexpChunkApp.show_help.<locals>.<genexpr>c                 S   s(   t d| d rd| d fpd| d fS )Nz\w+r   rf   )r   re   )t_wr   r   r   r   "  s    
z*RegexpChunkApp.show_help.<locals>.<lambda>)key)rw   z



















z1.0 + %d charsz(?s)(<z
>)(.*?)(</z>)rY  rf   rZ  rk   r|   rb  r   )r  r  r  replacejoinsortedlistr   itemsr  r   _HELPTAB_FG_PARAMSr   r  r   finditerr  startr   _HELPTAB_BG_PARAMS)
r   rX  namer  r   Cr  r  patternmr   r   r   rW    s:   

$&	zRegexpChunkApp.show_helpc                 G   s   |  | jd  dS Nrf   r   _view_historyr   r   r   r   r   r}  9     zRegexpChunkApp._history_prevc                 G   s   |  | jd  dS r  r  r   r   r   r   r  =  r  zRegexpChunkApp._history_nextc                 C   s.  t dtt| jd |}| jsd S || jkrd S d| jd< | jdd | jd| j| d  | jdd || _| 	| j| d  | 
| j| d | _| jr_dd	 | jd
D }ng }t|| _|   |   | jru|   | jt| jd k rd| jd t| j| jd< d S d| jd< d S )Nr   rf   r  rc  r   r   r   c                 S      g | ]}t |qS r   r   
fromstringr  liner   r   r   
<listcomp>T      z0RegexpChunkApp._view_history.<locals>.<listcomp>r   zGrammar {}/{}:r   rJ  )r  r  r  r   r   r   r  r   r   _syntax_highlight_grammarr   r   splitr   r   r   _highlight_devsetr   r   formatrz  )r   indexr  r   r   r   r  A  s8   



zRegexpChunkApp._view_historyc                 G      |  ddd dS )Nscrollrf   pager   r  r   r   r   r   r   j     zRegexpChunkApp._devset_nextc                 G   r  )Nr  r   r  r   r  r   r   r   r   r   n  r  zRegexpChunkApp._devset_prevc                 G   s"   | j d u rd S | j   d | _ d S r   )r   r   r   r   r   r   r   r  s   


zRegexpChunkApp.destroyc                 G   s   d}| j }|dkr|d dr| | jt|d   n9|dkr7|d dr7| | j|t|d    n|dkrL| tt|d | j   n
J d| d| |r^|   d S d S )	Nrf   r  unitr   r  movetozbad scroll command r   )	r   
startswithr   r   r   floatr   r   r   )r   r   argsNshowing_tracer   r   r   r  x  s   "zRegexpChunkApp._devset_scrollc                 C   s  |d u r| j }ttd|| j d }|| j kr| jsd S || _ d| _d| jd< d| jd< d| jd< d| jd< | j	d	d
 d| j d | j f | j
d< | j| j | j d  }i | _ddi| _t|D ]>\}}d}t| D ]$\}\}}t|| j||f< || d| d7 }t|| j||d f< qq| jd
|d d d  qe| jd ur|   d| jd< | j | j  }	| j d | j  }
| j|	|
 d S )Nr   rf   Fr  rc  rb  rl   rs   r   r   r  r   r   /r   r   z

rk   )r   r  r  r   r   r   r  r  r  r  r  r   r  r  r  r8  r  r   r   r  r  r   )r   r  samplesentnumsentlinestrr  rl   r  firstlastr   r   r   r     s>   







zRegexpChunkApp.show_devsetc                 C   s\   t  }d}|D ]$}t|tr'| | jkr |||t| f |t|7 }q|d7 }q|S )Nr   rf   )r   
isinstancer   r   r   addr  )r   treechunksr  childr   r   r   r    s   

zRegexpChunkApp._chunksc                 C   sT  | j d u rd S | jddd | jddd | jddd | jddd t|dD ]y\}}| s7q.td|}d }|	d	rg|
d	}d
|d |
d	f }d
|d |d	f }| jd|| td|D ]9}|d ur{|
 |kr{ n,d
|d |
 f }d
|d | f }|	 dv r| jd|| qm| jd|| qmq.d S )Nrs  r   r   rt  ru  ra   r   z(\\.|[^#])*(#.*)?rk   z%d.%drf   z[<>{}]z<>)r   r   
tag_remover  r  r  r   r   re   groupr  r   r  )r   r   linenor  r  comment_startr   r   r   r   r   r    s4   


z(RegexpChunkApp._syntax_highlight_grammarc                 C   s   | j d u rd S | jddd g | _t|dD ];\}}tdd|}| }|rTzt	
| W q tyS } z| jdd|d  d	|d   W Y d }~qd }~ww qd
| jd< d S )Nrr  r   r   r   r   r   z%s.0rf   z%s.0 lineendr   r   )r   r   r  _grammarcheck_errsr  r  r   r   r   r   r  r   r  r  )r   r   r  r  r   r   r   r   _grammarcheck  s$   
zRegexpChunkApp._grammarcheckc              
   G   s  |rt   | _| jdd | _}| |}|| jkrd S || _| jt| j	d k r/d| j
d< | | z|rBdd |dD }ng }W n ty` } z| | d | _W Y d }~d S d }~ww t|| _| jd	dd t   | _| jr{|   n|   | js|   d S d S )
Nr   r   rf   rJ  r   c                 S   r  r   r  r  r   r   r   r    r  z)RegexpChunkApp.update.<locals>.<listcomp>r   rr  )r0  r   r   r   r   r   r   r   r  r   rz  r  r  r   r  r   r   r  r   r   r   r  r/  r4  )r   eventr   r   r  r   r   r   r   r     s@   








zRegexpChunkApp.updatec                 C   s   |d u r| j | j| jd  }| jddd | jddd | jddd t|D ]@\}}| | }| |}| |}||D ]	}| 	||d qE|| D ]	}| 	||d qS|| D ]	}| 	||d qaq+d S )Nrf   rk  r   r   rm  rn  )
r   r   r  r  r  r7  r8  r  r  r  )r   r  r  r  r  r  r  r  r   r   r   r    s"   

z RegexpChunkApp._highlight_devsetc              
   C   sL   z| j |W S  ttfy% } z| jddd |W  Y d }~S d }~ww )Nrr  r   r   )r   parser   
IndexErrorr   r  )r   wordsr   r   r   r   r7  5  s   zRegexpChunkApp._chunkparsec              	   C   sT   |\}}| j || j|  d| j||f  | j|  d| j||f d   d S )NrD   rf   )r  r  r  r  )r   r  r  r  r  r   r   r   r   r  @  s    zRegexpChunkApp._color_chunkc                 C   sH   d | _ d | _d | _d| _g | _d| _| jdd | d | 	  d S )Nr   r   r   )
r   r   r   r   r   r   r   r  r   r   r   r   r   r   r   H  s   
zRegexpChunkApp.resetz# Regexp Chunk Parsing Grammar
# Saved %(date)s
#
# Development set: %(devset)s
#   Precision: %(precision)s
#   Recall:    %(recall)s
#   F-score:   %(fscore)s

%(grammar)s
c                 C   s   |sddg}t |dd}|sd S | jr2| j| | jd d kr2dd | jd d	d  D \}}}n| jd u r>d
 } }}nd } }}t|d}|| jtt	
 | j|||| j d  W d    d S 1 skw   Y  d S )NzChunk Gramamr.chunkz	All files*r  	filetypesdefaultextensionr   r   c                 s   s    | ]	}d d|  V  qdS )z%.2f%%r   Nr   )r  vr   r   r   r  j  s    
z.RegexpChunkApp.save_grammar.<locals>.<genexpr>rf   zGrammar not well formedzNot finished evaluation yetr   )dater   r&  r'  r.  r   )r   r   r   r   r   openwriteSAVE_GRAMMAR_TEMPLATEdictr0  ctimer   r   r   )r   filenameftypesr&  r'  r.  outfiler   r   r   r   a  s8   
"zRegexpChunkApp.save_grammarc                 C   s   |sddg}t |dd}|sd S | jdd |   t|}| }W d    n1 s.w   Y  tdd| }| j	d| |   d S )	Nr  r  r  r  r   r   z2^\# Regexp Chunk Parsing Grammar[\s\S]*F-score:.*
r   )
r   r   r  r   r   readr   r   lstripr   )r   r  r  infiler   r   r   r   r     s"   

zRegexpChunkApp.load_grammarc           
   	   C   sd  |sddg}t |dd}|sd S t|d}|d |dt   |d| j  t| jD ]4\}\}}}}d	|d
 t| j|d |d |d f }	|d|	  |d	dd |
  D  q1| jrv| j| | jd d ks| jd u r|d n|d |d	dd | j
  D  W d    d S W d    d S 1 sw   Y  d S )N)zChunk Gramamr History.txtr  r  r  r   z'# Regexp Chunk Parsing Grammar History
z# Saved %s
z# Development set: %s
z>Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, fscore=%.2f%%)rf   r   z
%s
r   c                 s   r  z  %s
Nr   r  r   r   r   r        z.RegexpChunkApp.save_history.<locals>.<genexpr>r   r   z#
Current Grammar (not well-formed)
z!
Current Grammar (not evaluated)
c                 s   r  r  r   r  r   r   r   r    r  )r   r   r  r0  r  r   r  r   r  r  r   r  r   r   r   r   )
r   r  r  r  r%  r=  r>  r?  r@  hdrr   r   r   r     s<   
"$

"zRegexpChunkApp.save_historyc                 G   sF   d}d}zddl m} |||d  W d S    t| j|| Y d S )Nz<NLTK RegExp Chunk Parser Application
Written by Edward Loperz2About: Regular Expression Chunk Parser Applicationr   )Message)messager   )tkinter.messageboxr  showr   r   )r   r   ABOUTTITLEr  r   r   r   r     s   zRegexpChunkApp.aboutc                 C   sJ   |d ur
| j | | j tt| j| j   | d | d d S )Nrf   r   )r   r   r  r  r   r   r   r   r{   r   r   r   r     s
   
zRegexpChunkApp.set_devset_sizec                 C   sX   |d ur
| j | | j  }| jjt| d | jjtdt| d d d d S )N)r{   ir   r]   )r   r   r   r   r  absr   r  r  r   r   r   r     s
   
&zRegexpChunkApp.resizec                 O   s    t  rdS | jj|i | dS )z
        Enter the Tkinter mainloop.  This function must be called if
        this demo is created from a non-interactive program (e.g.
        from a secript); otherwise, the demo will close as soon as
        the script completes.
        N)r   r   mainloop)r   r  kwargsr   r   r   r    s   zRegexpChunkApp.mainloop)r   Nr   r   Nr   )B__name__
__module____qualname____doc__r   r  r  r  r1  r6  r3  rE  rD  ry  r  r  r  _FONT_PARAMSrv  r  r~  _HELPTAB_BG_COLOR_HELPTAB_FG_COLORr  r  r  r   r   r   r   r   r   r  _DRAW_LINESr   r/  r4  r;  r   r   r   rW  r}  r  r  r   r   r   r  r   r  r  r  r   r  r7  r  r   r  r   r   r   r   r   r   r  r   r   r   r   r   -   s   	
 !"#7h
	


k	b K >?")
-
3


!


r   c                   C   s   t    d S r   )r   r  r   r   r   r   app  s   r!  __main__)'r  randomr   textwrapr0  tkinterr   r   r   r   r   r   r   r	   r
   r   tkinter.filedialogr   r   tkinter.fontr   
nltk.chunkr   r   nltk.chunk.regexpr   nltk.corpusr   r   nltk.draw.utilr   	nltk.treer   	nltk.utilr   r   r!  r  __all__r   r   r   r   <module>   s<   
0           3
