o
    iN(                     @  s  d Z ddlmZ ddlZddlmZmZmZmZm	Z	 ddl
ZddlmZ ddlmZ ddlmZmZ ddlmZmZ dd	lmZmZmZmZmZ erXdd
lmZmZmZm Z m!Z! dZ"d0ddZ#dde"dfd1d d!Z$de"fd2d$d%Z%d3d(d)Z&de"dfd4d+d,Z'de"dfd5d.d/Z(dS )6z"
data hash pandas / numpy objects
    )annotationsN)TYPE_CHECKINGHashableIterableIteratorcast)lib)hash_object_array)	ArrayLikenpt)is_categorical_dtypeis_list_like)ABCDataFrameABCExtensionArrayABCIndexABCMultiIndex	ABCSeries)Categorical	DataFrameIndex
MultiIndexSeries0123456789123456arraysIterator[np.ndarray]	num_itemsintreturnnpt.NDArray[np.uint64]c                 C  s   zt | }W n ty   tjg tjd Y S w t|g| } td}t|td }t| D ]\}}|| }||N }||9 }|td| | 7 }q2|d |ksXJ d|td7 }|S )z
    Parameters
    ----------
    arrays : Iterator[np.ndarray]
    num_items : int

    Returns
    -------
    np.ndarray[uint64]

    Should be the same as CPython's tupleobject.c
    dtypeiCB ixV4 iXB    zFed in wrong num_itemsi| )	nextStopIterationnparrayuint64	itertoolschain
zeros_like	enumerate)r   r   firstmultoutia	inverse_i r1   S/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/pandas/core/util/hashing.pycombine_hash_arrays2   s    
r3   Tutf8objIndex | DataFrame | Seriesindexboolencodingstrhash_key
str | None
categorizer   c                   s|  ddl m} du rtttr|tdddS ttr8tj j	ddd}||ddd}|S tt
rotj j	ddd}|rd fd	d
dD }t|g|}	t|	d}||jddd}|S ttr fdd
 D }
tj}|r fdd
dD }|d7 }t|
|}dd
 |D }
t|
|}||jddd}|S tdt )a~  
    Return a data hash of the Index/Series/DataFrame.

    Parameters
    ----------
    obj : Index, Series, or DataFrame
    index : bool, default True
        Include the index in the hash (if Series/DataFrame).
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    Series of uint64, same length as the object
    r   )r   Nr&   F)r    copyr>   )r7   r    r>   c                 3  &    | ]}t jd  djV  qdS F)r7   r9   r;   r=   Nhash_pandas_objectr7   _values.0_r=   r9   r;   r5   r1   r2   	<genexpr>       
z%hash_pandas_object.<locals>.<genexpr>N   c                 3  s$    | ]\}}t |j V  qd S rK   )
hash_arrayrD   )rF   rG   series)r=   r9   r;   r1   r2   rI      s
    
c                 3  r@   rA   rB   rE   rH   r1   r2   rI      rJ   r!   c                 s  s    | ]}|V  qd S rK   r1   )rF   xr1   r1   r2   rI      s    zUnexpected type for hashing )pandasr   _default_hash_key
isinstancer   hash_tuplesr   rM   rD   astyper   r'   r(   r3   r7   r   itemslencolumns	TypeErrortype)r5   r7   r9   r;   r=   r   hser
index_iterr   hashesr   index_hash_generator_hashesr1   rH   r2   rC   T   sN   

2






rC   vals+MultiIndex | Iterable[tuple[Hashable, ...]]c                   sz   t | stdddlm m} t| ts|| n|  fddtj	D }fdd|D }t
|t|}|S )a  
    Hash an MultiIndex / listlike-of-tuples efficiently.

    Parameters
    ----------
    vals : MultiIndex or listlike-of-tuples
    encoding : str, default 'utf8'
    hash_key : str, default _default_hash_key

    Returns
    -------
    ndarray[np.uint64] of hashed values
    z'must be convertible to a list-of-tuplesr   )r   r   c                   s(   g | ]} j | j| d ddqS )FTorderedfastpath)codeslevels)rF   level)r   mir1   r2   
<listcomp>   s    zhash_tuples.<locals>.<listcomp>c                 3  s    | ]
}t | d V  qdS )r9   r;   N)_hash_categorical)rF   catrj   r1   r2   rI      s    
zhash_tuples.<locals>.<genexpr>)r   rX   rP   r   r   rR   r   from_tuplesrangenlevelsr3   rV   )r`   r9   r;   r   cat_valsr]   rZ   r1   )r   r9   r;   rh   r2   rS      s   
rS   rl   r   c                 C  sd   t | jj}t|||dd}|  }t|r|| j}n	t j	t|dd}|
 r0tj||< |S )a  
    Hash a Categorical by hashing its categories, and then mapping the codes
    to the hashes

    Parameters
    ----------
    cat : Categorical
    encoding : str
    hash_key : str

    Returns
    -------
    ndarray[np.uint64] of hashed values, same size as len(c)
    F)r=   r&   r   )r$   asarray
categoriesrD   rM   isnarV   takere   zerosanyr   u8max)rl   r9   r;   valueshashedmaskresultr1   r1   r2   rk      s   	
rk   r
   c                 C  s~   t | ds	td| j}t|rtd| } t| ||S t| tr'|  \} }nt| t	j
s8tdt| j dt| |||S )aK  
    Given a 1d array, return an array of deterministic integers.

    Parameters
    ----------
    vals : ndarray or ExtensionArray
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    ndarray[np.uint64, ndim=1]
        Hashed values, same length as the vals.
    r    zmust pass a ndarray-liker   z6hash_array requires np.ndarray or ExtensionArray, not z!. Use hash_pandas_object instead.)hasattrrX   r    r   r   rk   rR   r   _values_for_factorizer$   ndarrayrY   __name___hash_ndarray)r`   r9   r;   r=   r    rG   r1   r1   r2   rM     s   


rM   
np.ndarrayc                 C  sp  | j }t|tjrtt| dtt|   S |tkr$| d} nrt	|j
tjtjfr9| djddd} n]t	|j
tjrS|jdkrS| d| j j d} nC|rxdd	lm}m}m} || dd
\}}	||||	ddd}
t|
||S zt| ||} W n ty   t| tt||} Y nw | | d? N } | td9 } | | d? N } | td9 } | | d? N } | S )z!
    See hash_array.__doc__.
       u8i8Fr?      ur   )r   r   	factorize)sortTrb      l   e9z    l   b&&&	    )r    r$   
issubdtype
complex128rM   realimagr8   rT   
issubclassrY   
datetime64timedelta64viewnumberitemsizerP   r   r   r   _with_inferrk   r	   rX   r:   objectr&   )r`   r9   r;   r=   r    r   r   r   re   rr   rl   r1   r1   r2   r   7  s:   	 r   )r   r   r   r   r   r   )r5   r6   r7   r8   r9   r:   r;   r<   r=   r8   r   r   )r`   ra   r9   r:   r;   r:   r   r   )rl   r   r9   r:   r;   r:   r   r   )
r`   r
   r9   r:   r;   r:   r=   r8   r   r   )
r`   r   r9   r:   r;   r:   r=   r8   r   r   ))__doc__
__future__r   r'   typingr   r   r   r   r   numpyr$   pandas._libsr   pandas._libs.hashingr	   pandas._typingr
   r   pandas.core.dtypes.commonr   r   pandas.core.dtypes.genericr   r   r   r   r   rP   r   r   r   r   r   rQ   r3   rC   rS   rk   rM   r   r1   r1   r1   r2   <module>   s>    

$^
.*3