o
    i{L                     @   s  d dl Z d dl mZ d dlmZ d dlZd dlmZmZm	Z	m
Z
mZmZmZmZ d dlmZ ejdddd	 Zejddd
d Zejdddd Zejdddd Zejdd Zejdddgdfddgd fdddgdd Zdd Zdd Zd d! Zd"d# Zejd$d% Zejdd&ddgdfd'dd(gd)d* Zejd+d, Z ejdddgd-fd.d/d0gd1d2 Z!d3d4 Z"ejd5d6 Z#ejdddgd-fddgd7fdd8dgd-fdd8dgd7fgd9d: Z$ejd;d< Z%ejdddgd=fd>d?d@gdAdB Z&dCdD Z'dEdF Z(ejdGdH Z)ejddIdJd(dd8gdKfddLd8gdMfddNd8gdKfgdOdP Z*ejdQdR Z+ejddSdJddd8gdTfddLd8gdUfddNd8gdTfgdVdW Z,ejdXdY Z-ejddIdJd(dd8gdZfddLd8gd[fddNd8gdZfgd\d] Z.ejd^d_ Z/ejddIdJd(dd8gd`fddLd8gdafddNd8gdbfgdcdd Z0ejdddedf Z1ejdgdd%d,d<dHdYdfej2d_ejj3dhdidjgejjdkg dled dmdndo Z4dpdq Z5drds Z6dtdu Z7dvdw Z8dxdy Z9dzd{ Z:d|d} Z;dS )~    N)fsum)
itemgetter)MLEAbsoluteDiscountingInterpolatedKneserNeyInterpolatedLaplaceLidstoneStupidBackoff
VocabularyWittenBellInterpolated)padded_everygramssession)scopec                   C   s   t g dddS )N)abcdz<s></s>   )
unk_cutoff)r
    r   r   X/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/nltk/test/unit/lm/test_models.py
vocabulary      r   c                   C   s   g dg dgS )N)r   r   r   r   )egr   r   r   r   r   r   r   r   r   training_data   r   r   c                 C      dd | D S )Nc                 S      g | ]	}t td |qS )   listr   .0sentr   r   r   
<listcomp>&       z(bigram_training_data.<locals>.<listcomp>r   r   r   r   r   bigram_training_data$      r*   c                 C   r   )Nc                 S   r    )   r"   r$   r   r   r   r'   +   r(   z)trigram_training_data.<locals>.<listcomp>r   r)   r   r   r   trigram_training_data)   r+   r-   c                 C   s   t d| d}|| |S Nr!   r   r   fit)r   r*   modelr   r   r   mle_bigram_model.      
r3   zword, context, expected_scorer   r   r   r   )r   Nr   )r   Ng$I$I?)yNg۶m۶m?c                 C       t | ||d|ksJ d S N-C6?pytestapproxscore)r3   wordcontextexpected_scorer   r   r   test_mle_bigram_scores5   s    r@   c                 C   s   t | ddgsJ d S )Nr   r   )mathisinflogscore)r3   r   r   r   'test_mle_bigram_logscore_for_zero_scoreH   s   rD   c                 C   H   g d}d}d}t | |d|ksJ t | |d|ks"J d S )N)r   r   )r   r   )r   <UNK>)rG   r   )r   r   r   r   g(\?g_vO@r8   r:   r;   entropy
perplexity)r3   trainedHrK   r   r   r   'test_mle_bigram_entropy_perplexity_seenL   s
   rN   c                 C   s4   g d}t | |sJ t | |sJ d S )N)rF   r   r   )r   r   rH   )rA   rB   rJ   rK   )r3   	untrainedr   r   r   )test_mle_bigram_entropy_perplexity_unseene   s   rQ   c                 C   sH   d}d}g d}t | |d|ksJ t | |d|ks"J d S )Ng~jt@gs @)r   r   r   )-r   rT   )r   r8   rI   )r3   rM   rK   textr   r   r   +test_mle_bigram_entropy_perplexity_unigramsm   s
   rX   c                 C      t d|d}||  |S Nr,   orderr   r0   r-   r   r2   r   r   r   mle_trigram_model   r4   r^   )r   )r   r   r   )r   Ngqq?)r5   NUUUUUU?c                 C   r6   r7   r9   )r^   r=   r>   r?   r   r   r   test_mle_trigram_scores   s    r`   c                 C      t dd|d}||  |S )N皙?r!   r[   r   r1   r*   r   r2   r   r   r   lidstone_bigram_model      
re   g88?)r   Ng"u)?)r   Ngк{?)r5   NgL?c                 C   r6   r7   r9   )re   r=   r>   r?   r   r   r   test_lidstone_bigram_score   s   rg   c                 C   rE   )NrF   rO   )r   rG   )rG   r   )r   r   )r   r   g=,Ԛ]@g鷯1@r8   rI   )re   rW   rM   rK   r   r   r    test_lidstone_entropy_perplexity   
   ri   c                 C   ra   )Nrb   r,   r[   rc   r]   r   r   r   lidstone_trigram_model   rf   rk   gqq?r   c                 C   r6   r7   r9   )rk   r=   r>   r?   r   r   r   test_lidstone_trigram_score   s   rl   c                 C   rY   r.   )r   r1   rd   r   r   r   laplace_bigram_model   r4   rm   gqq?)r   NgtE]t?)r   NgF]tE?)r5   NgF]tE?c                 C   r6   r7   r9   )rm   r=   r>   r?   r   r   r   test_laplace_bigram_score   s    rn   c                 C   rE   )Nrh   gQ	@gݓz!@r8   rI   )rm   rW   rM   rK   r   r   r   &test_laplace_bigram_entropy_perplexity  rj   ro   c                 C   s   | j dksJ d S )Nr   )gamma)rm   r   r   r   test_laplace_gamma5  s   rq   c                 C   rY   )Nr,   r/   )r   r1   r]   r   r   r   wittenbell_trigram_model9  r4   rr   )r   Ngqq?)r   Ng        grq?r   gqq?r   c                 C   r6   r7   r9   )rr   r=   r>   r?   r   r   r   test_wittenbell_trigram_score@  s   "rs   c                 C   ra   )Nr,   g      ?)r\   discountr   r   r1   r]   r   r   r   kneserney_trigram_modelr  rf   rv   )r   Ng$I$I?gm۶m?g$I$I?c                 C   r6   r7   r9   )rv   r=   r>   r?   r   r   r   test_kneserney_trigram_scorey  s   (rw   c                 C   rY   rZ   )r   r1   r]   r   r   r   "absolute_discounting_trigram_model  r4   rx   r_   g      ?c                 C   r6   r7   r9   )rx   r=   r>   r?   r   r   r   'test_absolute_discounting_trigram_score  s   %ry   c                 C   rY   rZ   )r	   r1   r]   r   r   r   stupid_backoff_trigram_model  r4   rz   g      ?      ?g?c                 C   r6   r7   r9   )rz   r=   r>   r?   r   r   r   !test_stupid_backoff_trigram_score  s   r|   c                 C   rY   )Nr!   r[   ru   rd   r   r   r   kneserney_bigram_model  r4   r}   model_fixturez*Stupid Backoff is not a valid distribution)reason)marksr>   )	rS   rT   rR   )r   rG   rV   )r   )r)w)idsc                    s<   | | t fddjD }t|ddksJ d S )Nc                 3   s    | ]	} | V  qd S N)r<   )r%   r   r>   r2   r   r   	<genexpr>!  s    z!test_sums_to_1.<locals>.<genexpr>gHz>r{   )getfixturevaluesumvocabr:   r;   )r~   r>   requestscores_for_contextr   r   r   test_sums_to_1  s   
r   c                 C   s   | j dddks
J d S )Nr,   random_seedrG   generater^   r   r   r   test_generate_one_no_context*  s   r   c                 C   sJ   | j dgddksJ | j ddgddksJ | j ddgddks#J d S )Nr   	text_seedr   r   r   r   r   r   r   r   'test_generate_one_from_limiting_context.  s   r   c                 C   s   | j ddddksJ d S )N)r   r   r!   r   r   r   r   r   r   r   r   %test_generate_one_from_varied_context5  s   r   c                 C   s<   t | jtdg}| | | jddddg dksJ d S )Nbdbdbd   )r   r      r   )r   r   r   r   r   r   r   )r   r\   r#   r1   r   )r^   more_training_textr   r   r   test_generate_cycle:  s   
 r   c                 C   s    | j ddddg dksJ d S )Nr   )r   r   r,   r   )rG   r   r   r   rG   r   r   r   r   r   test_generate_with_text_seedK  s    r   c                 C   s$   | j ddd| j dddksJ d S )N)aliensr,   r   r   r   r   r   r   r   test_generate_oov_text_seedU  s
   r   c                 C   sX   t t | jdd W d    n1 sw   Y  | jd dd| jddks*J d S )Nr   r   r,   r   r   )r:   raises	TypeErrorr   r   r   r   r   test_generate_None_text_seed[  s   
r   )<rA   r   r   operatorr   r:   nltk.lmr   r   r   r   r   r	   r
   r   nltk.lm.preprocessingr   fixturer   r   r*   r-   r3   markparametrizer@   rD   rN   rQ   rX   r^   r`   re   rg   ri   rk   rl   rm   rn   ro   rq   rr   rs   rv   rw   rx   ry   rz   r|   r}   paramxfailr   r   r   r   r   r   r   r   r   r   r   r   <module>   sD  (






























	
$	


!	





