o
    ¹iþ  ã                   @   s,   d dl mZ d dlmZ G dd„ deƒZdS )é    )Úload)ÚStemmerIc                   @   s0   e Zd ZdZdd„ Zdd„ Zdd„ Zdd	„ Zd
S )ÚRSLPStemmerug  
    A stemmer for Portuguese.

        >>> from nltk.stem import RSLPStemmer
        >>> st = RSLPStemmer()
        >>> # opening lines of Erico Verissimo's "MÃºsica ao Longe"
        >>> text = '''
        ... Clarissa risca com giz no quadro-negro a paisagem que os alunos
        ... devem copiar . Uma casinha de porta e janela , em cima duma
        ... coxilha .'''
        >>> for token in text.split(): # doctest: +NORMALIZE_WHITESPACE
        ...     print(st.stem(token))
        clariss risc com giz no quadro-negr a pais que os alun dev copi .
        uma cas de port e janel , em cim dum coxilh .
    c                 C   sˆ   g | _ | j  |  d¡¡ | j  |  d¡¡ | j  |  d¡¡ | j  |  d¡¡ | j  |  d¡¡ | j  |  d¡¡ | j  |  d¡¡ d S )Nzstep0.ptzstep1.ptzstep2.ptzstep3.ptzstep4.ptzstep5.ptzstep6.pt)Ú_modelÚappendÚ	read_rule)Úself© r	   úI/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/nltk/stem/rslp.pyÚ__init__5   s   zRSLPStemmer.__init__c                 C   sÐ   t d| dd d¡}| d¡}dd„ |D ƒ}dd„ |D ƒ}d	d„ |D ƒ}g }|D ]<}g }| d
¡}| |d dd… ¡ | t|d ƒ¡ | |d dd… ¡ | dd„ |d  d¡D ƒ¡ | |¡ q)|S )Nznltk:stemmers/rslp/Úraw)ÚformatÚutf8Ú
c                 S   s   g | ]}|d kr|‘qS )Ú r	   ©Ú.0Úliner	   r	   r
   Ú
<listcomp>D   ó    z)RSLPStemmer.read_rule.<locals>.<listcomp>c                 S   s   g | ]
}|d  dkr|‘qS )r   ú#r	   r   r	   r	   r
   r   E   s    c                 S   s   g | ]}|  d d¡‘qS )z		ú	)Úreplacer   r	   r	   r
   r   H   r   r   r   é   éÿÿÿÿé   c                 S   s   g | ]}|d d… ‘qS )r   r   r	   )r   Útokenr	   r	   r
   r   Z   r   é   ú,)r   ÚdecodeÚsplitr   Úint)r   ÚfilenameÚrulesÚlinesr   ÚruleÚtokensr	   r	   r
   r   @   s   

zRSLPStemmer.read_rulec                 C   s   |  ¡ }|d dkr|  |d¡}|d dkr|  |d¡}|  |d¡}|  |d¡}|}|  |d¡}||krF|}|  |d	¡}||krF|  |d
¡}|S )Nr   Úsr   Úar   r   r   é   é   é   )ÚlowerÚ
apply_rule)r   ÚwordÚ	prev_wordr	   r	   r
   Ústema   s   zRSLPStemmer.stemc                 C   sv   | j | }|D ]1}t|d ƒ}|| d … |d kr8t|ƒ||d  kr8||d vr8|d | … |d  } |S q|S )Nr   r   r   r   )r   Úlen)r   r.   Ú
rule_indexr#   r%   Úsuffix_lengthr	   r	   r
   r-      s   
€ zRSLPStemmer.apply_ruleN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r0   r-   r	   r	   r	   r
   r   $   s    !r   N)Ú	nltk.datar   Únltk.stem.apir   r   r	   r	   r	   r
   Ú<module>   s   