o
    i&                     @   s   d dl Z d dlmZ d dlmZ d dlmZ zd dlZW n	 ey%   Y nw d dl	m
Z
 G dd de
Zdd	 Zd
d ZG dd dZG dd dZdS )    N)abstractmethod)sqrt)stdout)ClusterIc                   @   sd   e Zd ZdZdddZdddZedd	 Zd
d Zedd Z	dd Z
dd Zdd Zdd ZdS )VectorSpaceClustererz
    Abstract clusterer which takes tokens and maps them into a vector space.
    Optionally performs singular value decomposition to reduce the
    dimensionality.
    FNc                 C   s   d| _ || _|| _dS )a)  
        :param normalise:       should vectors be normalised to length 1
        :type normalise:        boolean
        :param svd_dimensions:  number of dimensions to use in reducing vector
                                dimensionsionality with SVD
        :type svd_dimensions:   int
        N)_Tt_should_normalise_svd_dimensions)self	normalisesvd_dimensions r   L/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/nltk/cluster/util.py__init__   s   
zVectorSpaceClusterer.__init__c           
         s   t |dksJ  jrtt j|} jrb jt |d k rbtjt	t
|\}}}|d  j t jtj }|d d d  jf }|d  jd d f }	t	t||	}t	| _ || |rs fdd|D S d S )Nr   c                    s   g | ]}  |qS r   )classify).0vectorr
   r   r   
<listcomp>?       z0VectorSpaceClusterer.cluster.<locals>.<listcomp>)lenr   listmap
_normaliser	   numpylinalgsvd	transposearrayidentityfloat64dotr   cluster_vectorspace)
r
   vectorsassign_clusterstraceudvtSTDtr   r   r   cluster(   s    zVectorSpaceClusterer.clusterc                 C      dS )zD
        Finds the clusters using the given set of vectors.
        Nr   )r
   r#   r%   r   r   r   r"   A       z(VectorSpaceClusterer.cluster_vectorspacec                 C   s<   | j r| |}| jd urt| j|}| |}| |S N)r   r   r   r   r!   classify_vectorspacecluster_name)r
   r   r,   r   r   r   r   G   s   



zVectorSpaceClusterer.classifyc                 C   r-   )zN
        Returns the index of the appropriate cluster for the vector.
        Nr   r
   r   r   r   r   r0   O   r.   z)VectorSpaceClusterer.classify_vectorspacec                 C   s4   | j r| |}| jd urt| j|}| ||S r/   )r   r   r   r   r!   likelihood_vectorspace)r
   r   labelr   r   r   
likelihoodU   s
   

zVectorSpaceClusterer.likelihoodc                 C   s   |  |}||krdS dS )zP
        Returns the likelihood of the vector belonging to the cluster.
        g      ?g        )r0   )r
   r   r,   	predictedr   r   r   r3   \   s   
z+VectorSpaceClusterer.likelihood_vectorspacec                 C   s,   | j r| |}| jdurt| j|}|S )zU
        Returns the vector after normalisation and dimensionality reduction
        N)r   r   r   r   r!   r2   r   r   r   r   c   s
   

zVectorSpaceClusterer.vectorc                 C   s   |t t|| S )z7
        Normalises the vector to unit length.
        r   r   r!   r2   r   r   r   r   m   s   zVectorSpaceClusterer._normalise)FN)FF)__name__
__module____qualname____doc__r   r,   r   r"   r   r0   r5   r3   r   r   r   r   r   r   r      s    




r   c                 C   s   | | }t t||S )z}
    Returns the euclidean distance between vectors u and v. This is equivalent
    to the length of the vector (u - v).
    r7   )r&   vdiffr   r   r   euclidean_distancet   s   r>   c                 C   s0   dt | |tt | | tt ||   S )zw
    Returns 1 minus the cosine of the angle between vectors v and u. This is
    equal to ``1 - (u.v / |u||v|)``.
       )r   r!   r   )r&   r<   r   r   r   cosine_distance}   s   0r@   c                   @   s2   e Zd ZdZdd ZdddZdd Zd	d
 ZdS )_DendrogramNodezTree node of a dendrogram.c                 G   s   || _ || _d S r/   )_value	_children)r
   valuechildrenr   r   r   r      s   
z_DendrogramNode.__init__Tc                 C   s<   | j rg }| j D ]
}||| q|S |r| jgS | gS r/   )rC   extendleavesrB   )r
   valuesrG   childr   r   r   rG      s   
z_DendrogramNode.leavesc                 C   s   | j | fg}t||k r@| \}}|js|||f n#|jD ]}|jr.||j |f q |d|f q |  t||k sg }|D ]\}}||  qD|S Nr   )rB   r   poprC   pushappendsortrG   )r
   nqueueprioritynoderI   groupsr   r   r   rS      s    
z_DendrogramNode.groupsc                 C   s   t | j|jdk S rJ   )r@   rB   )r
   
comparatorr   r   r   __lt__   s   z_DendrogramNode.__lt__N)T)r8   r9   r:   r;   r   rG   rS   rU   r   r   r   r   rA      s    
rA   c                   @   s@   e Zd ZdZg fddZdd Zdd Zg fdd	Zd
d ZdS )
Dendrograma  
    Represents a dendrogram, a tree with a specified branching order.  This
    must be initialised with the leaf items, then iteratively call merge for
    each branch. This class constructs a tree representing the order of calls
    to the merge function.
    c                 C   s(   dd |D | _ t| j | _d| _dS )zs
        :param  items: the items at the leaves of the dendrogram
        :type   items: sequence of (any)
        c                 S   s   g | ]}t |qS r   )rA   r   itemr   r   r   r          z'Dendrogram.__init__.<locals>.<listcomp>r?   N)_itemscopy_original_items_merge)r
   itemsr   r   r   r      s   
zDendrogram.__init__c                    sj   t |dksJ t jg fdd|D R  }  jd7  _| j|d < |dd D ]} j|= q,dS )a=  
        Merges nodes at given indices in the dendrogram. The nodes will be
        combined which then replaces the first node specified. All other nodes
        involved in the merge will be removed.

        :param  indices: indices of the items to merge (at least two)
        :type   indices: seq of int
           c                 3   s    | ]} j | V  qd S r/   )rZ   )r   ir   r   r   	<genexpr>       z#Dendrogram.merge.<locals>.<genexpr>r?   r   N)r   rA   r]   rZ   )r
   indicesrR   r`   r   r   r   merge   s   	 
zDendrogram.mergec                 C   s8   t | jdkrt| jg| jR  }n| jd }||S )z
        Finds the n-groups of items (leaves) reachable from a cut at depth n.
        :param  n: number of groups
        :type   n: int
        r?   r   )r   rZ   rA   r]   rS   )r
   rO   rootr   r   r   rS      s   

zDendrogram.groupsc                    s  d\}}}t | jdkrt| jg| jR  }n| jd }| j}|r$|}ndd |D }ttt |d d t d dfdd		 d
d }|j|fg}	 fdd|D }
|	r|		 \}}t
tdd |j}t
t|j|}|rt|}t|}tt |D ]L}|| |v r||kr| |d| n||kr| ||d n| |||  ||
|< q||  kr|krn n	| ||| q||
|  q|d |jD ]}|jr|	|j|f q|	  |
D ]}|| q|d |	s]|dfdd|D  |d dS )z
        Print the dendrogram in ASCII art to standard out.

        :param leaf_labels: an optional list of strings to use for labeling the
                            leaves
        :type leaf_labels: list
        )+-|r?   r   c                 S   s   g | ]}d |j  qS )z%s)rB   r   leafr   r   r   r      r   z#Dendrogram.show.<locals>.<listcomp>r_    c                    s    |  |  |  S r/   r   )centreleftright)lhalfrhalfr   r   format   s   zDendrogram.show.<locals>.formatc                 S   s   t |  d S r/   )r   write)strr   r   r   display   s   z Dendrogram.show.<locals>.displayc                    s   g | ]} d qS )rk   r   ri   )rq   r   r   r     rY   c                 S   s   |  dd S )NFr   )rG   )cr   r   r   <lambda>  s    z!Dendrogram.show.<locals>.<lambda>
 c                 3   s    | ]}|  V  qd S r/   )centerrW   )widthr   r   ra   #  rb   z"Dendrogram.show.<locals>.<genexpr>N)rk   rk   )r   rZ   rA   r]   r\   maxr   intrB   rK   r   rC   indexminrangerM   rN   join)r
   leaf_labelsJOINHLINKVLINKre   rG   last_rowrt   rP   	verticalsrQ   rR   child_left_leafrc   min_idxmax_idxr`   rI   verticalr   )rq   ro   rp   rz   r   show   sZ   




zDendrogram.showc                 C   sD   t | jdkrt| jg| jR  }n| jd }|d}dt | S )Nr?   r   Fz<Dendrogram with %d leaves>)r   rZ   rA   r]   rG   )r
   re   rG   r   r   r   __repr__&  s
   

zDendrogram.__repr__N)	r8   r9   r:   r;   r   rd   rS   r   r   r   r   r   r   rV      s    	IrV   )r[   abcr   mathr   sysr   r   ImportErrornltk.cluster.apir   r   r>   r@   rA   rV   r   r   r   r   <module>   s   _	+