o
    iG                     @   sZ   d dl Z d dlmZ d dlmZ d dlmZ G dd deZdd Ze	d	kr+e  dS dS )
    N)defaultdict)reduce)CorpusReaderc                       s^   e Zd ZdZedZedd Zd fdd	Z	dd	d
Z
dddZdddZdd Z  ZS )LinThesaurusCorpusReaderzEWrapper for the LISP-formatted thesauruses distributed by Dekang Lin.z \("?([^"]+)"? \(desc [0-9.]+\).+c                   C   s   t tS )z6Factory for creating defaultdict of defaultdict(dict)s)r   dict r   r   Q/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/nltk/corpus/reader/lin.py__defaultdict_factory   s   z.LinThesaurusCorpusReader.__defaultdict_factory        c              	      s   t  |d ttj| _|| _| jdddD ]T\}}}t|C}d}|D ]6}|	 }|r7tj
d|}	d}q%|dkr>d}q%|d}
t|
dkr[|
\}}t|| j| |	 |	d	< q%W d
   n1 sfw   Y  qd
S )a  
        Initialize the thesaurus.

        :param root: root directory containing thesaurus LISP files
        :type root: C{string}
        :param badscore: the score to give to words which do not appear in each other's sets of synonyms
        :type badscore: C{float}
        zsim[A-Z]\.lspT)include_encodinginclude_fileidz\1Fz))	   "N)super__init__r   r   ._LinThesaurusCorpusReader__defaultdict_factory
_thesaurus	_badscoreabspathsopenstrip_key_resubsplitlenfloat)selfrootbadscorepathencodingfileidlin_filefirstlinekey
split_linengramscore	__class__r   r   r      s4   


z!LinThesaurusCorpusReader.__init__Nc                    sf    kr|rdS dd j D S |r'j|   v r$j|    S jS  fddj D S )a  
        Returns the similarity score for two ngrams.

        :param ngram1: first ngram to compare
        :type ngram1: C{string}
        :param ngram2: second ngram to compare
        :type ngram2: C{string}
        :param fileid: thesaurus fileid to search in. If None, search all fileids.
        :type fileid: C{string}
        :return: If fileid is specified, just the score for the two ngrams; otherwise,
                 list of tuples of fileids and scores.
              ?c                 S   s   g | ]}|d fqS )r,   r   .0fidr   r   r   
<listcomp>Q   s    z7LinThesaurusCorpusReader.similarity.<locals>.<listcomp>c                    s:   g | ]}|j |   v rj |    njfqS r   )r   r   r-   ngram1ngram2r   r   r   r0   Z   s    	)_fileidsr   r   )r   r2   r3   r"   r   r1   r   
similarity?   s   	z#LinThesaurusCorpusReader.similarityc                    ,   |rj |    S  fddjD S )a   
        Returns a list of scored synonyms (tuples of synonyms and scores) for the current ngram

        :param ngram: ngram to lookup
        :type ngram: C{string}
        :param fileid: thesaurus fileid to search in. If None, search all fileids.
        :type fileid: C{string}
        :return: If fileid is specified, list of tuples of scores and synonyms; otherwise,
                 list of tuples of fileids and lists, where inner lists consist of tuples of
                 scores and synonyms.
        c                    "   g | ]}|j |    fqS r   )r   itemsr.   r"   r(   r   r   r   r0   u       z<LinThesaurusCorpusReader.scored_synonyms.<locals>.<listcomp>)r   r8   r4   r   r(   r"   r   r:   r   scored_synonymsf   s
   z(LinThesaurusCorpusReader.scored_synonymsc                    r6   )a  
        Returns a list of synonyms for the current ngram.

        :param ngram: ngram to lookup
        :type ngram: C{string}
        :param fileid: thesaurus fileid to search in. If None, search all fileids.
        :type fileid: C{string}
        :return: If fileid is specified, list of synonyms; otherwise, list of tuples of fileids and
                 lists, where inner lists contain synonyms.
        c                    r7   r   )r   keysr9   r:   r   r   r0      r;   z5LinThesaurusCorpusReader.synonyms.<locals>.<listcomp>)r   r>   r4   r<   r   r:   r   synonymsz   s
   z!LinThesaurusCorpusReader.synonymsc                    s   t  fddjdS )z
        Determines whether or not the given ngram is in the thesaurus.

        :param ngram: ngram to lookup
        :type ngram: C{string}
        :return: whether the given ngram is in the thesaurus.
        c                    s   | p j | v S N)r   )accumr"   r:   r   r   <lambda>   s    z7LinThesaurusCorpusReader.__contains__.<locals>.<lambda>F)r   r4   )r   r(   r   r:   r   __contains__   s
   z%LinThesaurusCorpusReader.__contains__)r
   r@   )__name__
__module____qualname____doc__recompiler   staticmethodr   r   r5   r=   r?   rC   __classcell__r   r   r*   r   r      s    


$
'
r   c                  C   s   ddl m}  d}d}td|  t| | td|  t| | td|  t| j|dd	 td|  t| j|dd	 td
| d| d t| || d S )Nr   )lin_thesaurusbusiness
enterprisezGetting synonyms for zGetting scored synonyms for z5Getting synonyms from simN.lsp (noun subsection) for zsimN.lsp)r"   zSimilarity score for z and :)nltk.corpusrL   printr?   r=   r5   )thesword1word2r   r   r   demo   s   rU   __main__)
rH   collectionsr   	functoolsr   nltk.corpus.readerr   r   rU   rD   r   r   r   r   <module>   s    
