o
    iR                     @   sj   d Z ddlZddlZddlZddlmZ ddlZddlm	Z	 G dd dZ
G dd dZG d	d
 d
ZdS )a  
If you use the VADER sentiment analysis tools, please cite:

Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for
Sentiment Analysis of Social Media Text. Eighth International Conference on
Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
    N)product)pairwisec                   @   s  e Zd ZdZdZdZdZdZh dZi deded	ed
edededededededededededededei dedededededededed ed!ed"ed#ed$ed%ed&ed'ed(ei d)ed*ed+ed,ed-ed.ed/ed0ed1ed2ed3ed4ed5ed6ed7ed8ed9eeeeeeeeeeeeeeeed:Z	d;d;d<d=d>d?d=d@Z
edAeej dBZg dCZdDdE ZdOdGdHZdPdJdKZdLdM ZdNS )QVaderConstantsz8
    A class to keep the Vader lists and constants.
    gn?gnҿg~jt?gGz>;   ain'tcan'tdon'tisn'tuh-uhwon'taren'tdidn'thadn'thasn'tshan'twasn'tdaren'tdoesn'thaven'tmustn'tneedn'tweren'tcouldn'tmightn'toughtn'twouldn't	shouldn'tnornotaintcantdontisntnonenopeuhuhwontarentdidnthadnthasntnevershantwasntcannotdarentdoesnthaventmustntneedntrarelyseldomwerentcouldntdespitemightntneithernothingnowhereoughtntwithoutwouldntshouldnt
absolutely	amazinglyawfully
completelyconsiderably	decidedlydeeplyeffing
enormouslyentirely
especiallyexceptionally	extremely
fabulouslyflippingflippinfrickingfrickinfriggingfrigginfullyfuckinggreatlyhellahighlyhugely
incredibly	intenselymajorlymoremostparticularlypurelyquitereally
remarkablysosubstantially
thoroughlytotallytremendouslyuberunbelievably	unusuallyutterlyveryalmostbarelyhardlyzjust enoughzkind of)kindakindofzkind-oflesslittle
marginallyoccasionallypartlyscarcelyslightlysomewhatzsort ofsortasortofzsort-of         ?   g      )zthe shitzthe bombzbad assz
yeah rightzcut the mustardzkiss of deathzhand to mouth[]).!?,;:-'"z!!z!!!z??z???z?!?z!?!z?!?!z!?!?c                 C   s   d S N )selfr   r   O/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/nltk/sentiment/vader.py__init__   s   zVaderConstants.__init__Tc                    sn   | j  t fdd|D rdS |rtdd |D rdS t|D ]\}}| dkr4| dkr4 dS q!dS )z<
        Determine if input contains negation words
        c                 3   s    | ]	}|   v V  qd S r   lower.0word	neg_wordsr   r   	<genexpr>       z)VaderConstants.negated.<locals>.<genexpr>Tc                 s   s    | ]	}d |  v V  qdS )zn'tNr   r   r   r   r   r      r   leastatF)NEGATEanyr   r   )r   input_words
include_ntfirstsecondr   r   r   negated   s   zVaderConstants.negated   c                 C   s   |t || |  }|S )z|
        Normalize the score to be between -1 and 1 using an alpha that
        approximates the max expected value
        )mathsqrt)r   scorealpha
norm_scorer   r   r   	normalize   s   zVaderConstants.normalizec                 C   s`   d}|  }|| jv r.| j| }|dk r|d9 }| r.|r.|dkr)|| j7 }|S || j8 }|S )zh
        Check if the preceding words increase, decrease, or negate/nullify the
        valence
                r   )r   BOOSTER_DICTisupperC_INCR)r   r   valenceis_cap_diffscalar
word_lowerr   r   r   scalar_inc_dec   s   



zVaderConstants.scalar_inc_decN)T)r   )__name__
__module____qualname____doc__B_INCRB_DECRr   N_SCALARr   r   SPECIAL_CASE_IDIOMSrecompileescapestringpunctuationREGEX_REMOVE_PUNCTUATION	PUNC_LISTr   r   r   r   r   r   r   r   r   !   s   A	
 !"#$%&'()*+,-./01234G

r   c                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )	SentiTextzL
    Identify sentiment-relevant string-level properties of input text.
    c                 C   sF   t |tst|d}|| _|| _|| _|  | _| | j| _	d S )Nzutf-8)

isinstancestrencodetextr   r   _words_and_emoticonswords_and_emoticonsallcap_differentialr   )r   r   	punc_listregex_remove_punctuationr   r   r   r     s   

zSentiText.__init__c                 C   sd   | j d| j}| }dd |D }dd t| j|D }dd t|| jD }|}|| |S )zt
        Returns mapping of form:
        {
            'cat,': 'cat',
            ',cat': 'cat',
        }
         c                 S   s   h | ]
}t |d kr|qS    len)r   wr   r   r   	<setcomp>#      z-SentiText._words_plus_punc.<locals>.<setcomp>c                 S      i | ]
}d  ||d qS )r   r   joinr   pr   r   r   
<dictcomp>%  r   z.SentiText._words_plus_punc.<locals>.<dictcomp>c                 S   r   )r   r   r   r   r   r   r   r   &  r   )r   subr   splitr   r   update)r   no_punc_text
words_onlypunc_before
punc_afterwords_punc_dictr   r   r   _words_plus_punc  s   
zSentiText._words_plus_puncc                 C   sJ   | j  }|  }dd |D }t|D ]\}}||v r"|| ||< q|S )z
        Removes leading and trailing puncutation
        Leaves contractions and most emoticons
            Does not preserve punc-plus-letter emoticons (e.g. :D)
        c                 S   s   g | ]
}t |d kr|qS r   r   )r   wer   r   r   
<listcomp>3  r   z2SentiText._words_and_emoticons.<locals>.<listcomp>)r   r   r   	enumerate)r   wesr   ir   r   r   r   r   +  s   
zSentiText._words_and_emoticonsc                 C   sT   d}d}|D ]
}|  r|d7 }qt|| }d|  k r#t|k r(n |S d}|S )z
        Check whether just some words in the input are ALL CAPS

        :param list words: The words to inspect
        :returns: `True` if some but not all items in `words` are ALL CAPS
        Fr   r   T)r   r   )r   wordsis_differentallcap_wordsr   cap_differentialr   r   r   r   9  s   zSentiText.allcap_differentialN)r   r   r   r   r   r   r   r   r   r   r   r   r     s    r   c                   @   s|   e Zd ZdZ	dddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd ZdS )SentimentIntensityAnalyzerz8
    Give a sentiment intensity score to sentences.
    ;sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txtc                 C   s$   t j|| _|  | _t | _d S r   )nltkdataloadlexicon_filemake_lex_dictlexiconr   	constants)r   r   r   r   r   r   P  s   
z#SentimentIntensityAnalyzer.__init__c                 C   s@   i }| j dD ]}| ddd \}}t|||< q|S )z6
        Convert lexicon file to a dictionary
        
	r   r   )r   r   stripfloat)r   lex_dictliner   measurer   r   r   r   X  s
   z(SentimentIntensityAnalyzer.make_lex_dictc                 C   s   t || jj| jj}g }|j}|D ]8}d}||}|t|d k r2| dkr2||d   dks:| | jjv r@|	| q| 
|||||}q| ||}| ||S )a  
        Return a float for sentiment strength based on the input text.
        Positive values are positive valence, negative value are negative
        valence.

        :note: Hashtags are not taken into consideration (e.g. #BAD is neutral). If you
            are interested in processing the text in the hashtags too, then we recommend
            preprocessing your data to remove the #, after which the hashtag text may be
            matched as if it was a normal word in the sentence.
        r   r   kindof)r   r   r   r   r   indexr   r   r   appendsentiment_valence
_but_checkscore_valence)r   r   	sentitext
sentimentsr   itemr   r   r   r   r   polarity_scoresb  s"   

z*SentimentIntensityAnalyzer.polarity_scoresc                 C   s  |j }|j}| }|| jv r| j| }| r+|r+|dkr%|| jj7 }n|| jj8 }tddD ]P}	||	kr|||	d    | jvr| j|||	d   ||}
|	dkr]|
dkr]|
d }
|	dkri|
dkri|
d }
||
 }| 	|||	|}|	dkr| 
|||}q0| |||}|| |S )Nr   r}   r   gffffff?r   g?)r   r   r   r   r   r   r   ranger   _never_check_idioms_check_least_checkr   )r   r   r  r  r   r  r   r   item_lowercasestart_isr   r   r   r    s>   

	
z,SentimentIntensityAnalyzer.sentiment_valencec                 C   s   |dkr5||d    | jvr5||d    dkr5||d    dkr3||d    dkr3|| jj }|S |dkrT||d    | jvrT||d    dkrT|| jj }|S )Nr   r   r   r   rm   r   )r   r   r   r   )r   r   r   r   r   r   r   r    s   z'SentimentIntensityAnalyzer._least_checkc                 C   sr   dd |D }dht |@ }|r7|tt|}t|D ]\}}||k r,|d ||< q||kr6|d ||< q|S )Nc                 S   s   g | ]}|  qS r   r   )r   w_er   r   r   r     s    z9SentimentIntensityAnalyzer._but_check.<locals>.<listcomp>butg      ?r~   )setr   nextiterr   )r   r   r  r  bisidx	sentimentr   r   r   r    s   z%SentimentIntensityAnalyzer._but_checkc                 C   s  ||d   d||  }d ||d  ||d  || }||d   d||d   }d ||d  ||d  ||d  }d ||d  ||d  }|||||g}	|	D ]}
|
| jjv rg| jj|
 } nqWt|d |kr||  d||d   }|| jjv r| jj| }t|d |d krd || ||d  ||d  }|| jjv r| jj| }|| jjv s|| jjv r|| jj }|S )Nr    z{} {} {}r   r}   z{} {})formatr   r   r   r   r   )r   r   r   r   onezero
twoonezerotwoonethreetwoonethreetwo	sequencesseqzeroone
zeroonetwor   r   r   r
    sJ   






z(SentimentIntensityAnalyzer._idioms_checkc                 C   s   |dkr| j ||d  gr|| j j }|dkrI||d  dkr6||d  dks1||d  dkr6|d }n| j |||d   grI|| j j }|dkr||d  dkre||d  dksu||d  dksu||d  dksu||d  dkr{|d	 }|S | j |||d   gr|| j j }|S )
Nr   r   r   r*   rd   thisr~   r}   g      ?)r   r   r   )r   r   r   r  r   r   r   r   r	    s*   
z'SentimentIntensityAnalyzer._never_checkc                 C   s    |  |}| |}|| }|S r   )_amplify_ep_amplify_qm)r   sum_sr   ep_amplifierqm_amplifierpunct_emph_amplifierr   r   r   _punctuation_emphasis$  s   

z0SentimentIntensityAnalyzer._punctuation_emphasisc                 C   s"   | d}|dkrd}|d }|S )Nr      g㥛 ?count)r   r   ep_countr&  r   r   r   r#  +  s
   
z&SentimentIntensityAnalyzer._amplify_epc                 C   s2   | d}d}|dkr|dkr|d }|S d}|S )Nr   r   r   r}   g
ףp=
?gQ?r+  )r   r   qm_countr'  r   r   r   r$  5  s   
z&SentimentIntensityAnalyzer._amplify_qmc                 C   s`   d}d}d}|D ]"}|dkr|t |d 7 }|dk r"|t |d 7 }|dkr*|d7 }q|||fS )Nr   r   r   )r   )r   r  pos_sumneg_sum	neu_countsentiment_scorer   r   r   _sift_sentiment_scoresB  s    


z1SentimentIntensityAnalyzer._sift_sentiment_scoresc                 C   s   |rct t|}| ||}|dkr||7 }n|dk r||8 }| j|}| |\}}}|t|kr9||7 }n|t|k rD||8 }|t| | }	t||	 }
t||	 }t||	 }nd}d}
d}d}t|dt|dt|
dt|dd}|S )Nr   r   r}   r*  )negneuposcompound)	r   sumr)  r   r   r3  r   fabsround)r   r  r   r%  r(  r7  r/  r0  r1  totalr6  r4  r5  sentiment_dictr   r   r   r  T  s6   

z(SentimentIntensityAnalyzer.score_valenceN)r   )r   r   r   r   r   r   r  r  r  r  r
  r	  r)  r#  r$  r3  r  r   r   r   r   r   K  s     

"21
r   )r   r   r   r   	itertoolsr   	nltk.datar   	nltk.utilr   r   r   r   r   r   r   r   <module>   s    gD