o
    i^                     @   sv   d dl Z d dlZd dlZd dlmZ d dlZd dlmZm	Z	m
Z
mZmZ d dlmZ d dlmZmZ G dd dZdS )    N)is_integer_dtype)CategoricalCategoricalIndex	DataFrameSeriesget_dummies)SparseArraySparseDtypec                   @   s0  e Zd Zejdd Zejddejedgddd Z	ejd	d
gddd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zejd3d4e d5d6giie d7d8giej!d9fd4e d:d5giie d;d8giej!d9fe d:d6gid5d<e d7d8giej!d9fe d:d6gid5d=e d>d8giej!d9fgd?d@ Z"dAdB Z#dCdD Z$dEdF Z%dGdH Z&dIdJ Z'dKdL Z(dMdN Z)dOdP Z*ejdQdRdSgdTdU Z+ejd
dRdSgdVdW Z,dXdY Z-dZd[ Z.ejd\d]gd^d_ Z/dS )`TestGetDummiesc                 C   s   t g dg dg ddS )Nabr   r   r   c         )ABC)r   )self r   `/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/pandas/tests/reshape/test_get_dummies.pydf   s   zTestGetDummies.dfuint8i8N)paramsc                 C   s   t |jS N)npdtypeparamr   requestr   r   r   r       s   zTestGetDummies.dtypedensesparsec                 C   s
   |j dkS )Nr%   )r!   r"   r   r   r   r%       s   
zTestGetDummies.sparsec                 C   s   |d u rt jS |S r   )r   r   )r   r    r   r   r   effective_dtype&   s   zTestGetDummies.effective_dtypec                 C   sD   d}t jt|d t|dd W d    d S 1 sw   Y  d S )Nz1dtype=object is not a valid dtype for get_dummiesmatchobjectr    )pytestraises
ValueErrorr   )r   r   msgr   r   r   'test_get_dummies_raises_on_dtype_object+   s   "z6TestGetDummies.test_get_dummies_raises_on_dtype_objectc                 C   s   t d}t|}t|t d}tg dg dg dd| |d}|r+|jtdd	}t|||d
}t|| t|||d
}t|| t d|_	t|||d
}t|| d S )NabcABCr   r   r   r   r   r   r   r   r   r   r   r   r*           
fill_valuer%   r    )
listr   r   r&   applyr   r   tmassert_frame_equalindex)r   r%   r    s_lists_seriess_series_indexexpectedresultr   r   r   test_get_dummies_basic0   s    
z%TestGetDummies.test_get_dummies_basicc                 C   s  t d}t|}tg dg dg dd}tg dg dg dd| |t dd	}|rEt|r5d
}n	|tkr<d}nd}|jt|d}t|||d}t	
|| t|||d}t	
|| t||j||d}|rxd| |j d| d}	n| |j}	t|	di}|j }dd |jD |_t	|| t|dg||d}ddd}
d|
|	d
 |
|	< t|
 }|j }dd |jD |_| }t	|| d S )Nr0   )r   r   r   r   r   )r   r   r   r   r   )r   r   r   r   r   r5   r2   r3   r4   )r    columnsr   Fr6   r7   r9   )rE   r%   r    zSparse[z, ]   c                 S      g | ]}t |qS r   str.0ir   r   r   
<listcomp>i       z?TestGetDummies.test_get_dummies_basic_types.<locals>.<listcomp>r   r   )int64r)   r   c                 S   rH   r   rI   rK   r   r   r   rN   s   rO   )r:   r   r   r&   r   boolr;   r   r   r<   r=   rE   namedtypesvalue_countsr>   assert_series_equalget
sort_index)r   r%   r    r?   r@   s_dfrB   r8   rC   
dtype_nameexpected_countsr   r   r   test_get_dummies_basic_typesE   sJ   


z+TestGetDummies.test_get_dummies_basic_typesc                 C   s   t jg}t|}t|dgd}t||d}t||d}t||d}|js&J |js+J |js0J |j dgks:J |j dgksDJ |j dgksNJ d S )Nr   r>   r%   r   )r   nanr   r   emptyr>   tolist)r   r%   just_na_listjust_na_seriesjust_na_series_indexres_list
res_seriesres_series_indexr   r   r   test_get_dummies_just_naw   s   


z'TestGetDummies.test_get_dummies_just_nac           
      C   s  ddt jg}t|||d}tg dg dd| |d}|r&|jtdd	}t|| t|d
||d}tt jg ddg ddg di| |d}|j	ddt jgdd}|j
|_
|rb|jtdd	}t|| tt jgd
||d}ttddgdt jg| |d}	t|j|	j d S )Nr   r   r9   r2   r3   )r   r   r*   r6   r7   Tdummy_nar%   r    r4   r   axisr   r\   rE   r    )r   r^   r   r   r&   r;   r   r<   r=   reindexrE   r   assert_numpy_array_equalvalues)
r   r%   r    sresexpres_naexp_nares_just_naexp_just_nar   r   r   test_get_dummies_include_na   s.   z*TestGetDummies.test_get_dummies_include_nac                 C   st   dd l }d}|d}|||g}t|d|d}tdg dd| g d	itjd
}|r2|jtdd}t	|| d S )Nr   ezLATIN SMALL LETTER E WITH ACUTEletterprefixr%   letter_er2   letter_r   r   r   r*   r7   )
unicodedatalookupr   r   r   r   r;   r   r<   r=   )r   r%   r   rx   eacuterp   rq   rr   r   r   r   test_get_dummies_unicode   s   

z'TestGetDummies.test_get_dummies_unicodec                 C   s   |ddg }t ||d}tg dg dg dg ddtjd	}|rCttg dd
d	tg dd
d	tg dd
d	tg dd
d	d}t|| d S )Nr   r   r]   r   r   r   r3   r   r   r   r4   A_aA_bB_bB_cr*   r   )r   r   r   r   r   r<   r=   r   r   r%   rC   rB   r   r   r   test_dataframe_dummies_all_obj   s   	z-TestGetDummies.test_dataframe_dummies_all_objc                 C   s\   |ddg }| ddd}t|}tg dg dg dg d	d
tjd}t|| d S )Nr   r   r)   stringr   r   r   r3   r   r4   r   r*   )astyper   r   r   r   r<   r=   r   r   rC   rB   r   r   r   #test_dataframe_dummies_string_dtype   s   	z2TestGetDummies.test_dataframe_dummies_string_dtypec              	   C   s   t |||d}|rt}t|d}ntj}|}tg d|g d|d|g d|d|g d|d|g d|dd	}|g d	 }t|| d S )
Nr9   r   r   r   r*   r3   r   r4   r   r   r   r   r   )r   r   r	   r   arrayr   r<   r=   r   r   r%   r    rC   arrtyprB   r   r   r   "test_dataframe_dummies_mix_default   s    	z1TestGetDummies.test_dataframe_dummies_mix_defaultc                    s   ddg}t |||d}tg dg dg dg dg dd	tjd
}|dg |dg< g d}|dg|  }|r9tnt ||  fdd||< t|| d S )Nfrom_Afrom_Brz   r   r   r3   r   r4   r   from_A_afrom_A_bfrom_B_bfrom_B_cr*   r   r   r   r   r   c                    s    | S r   r   )xr   r   r   <lambda>   s    zCTestGetDummies.test_dataframe_dummies_prefix_list.<locals>.<lambda>)	r   r   r   r   r   r   r;   r<   r=   )r   r   r%   prefixesrC   rB   colsr   r   r   "test_dataframe_dummies_prefix_list   s"   
z1TestGetDummies.test_dataframe_dummies_prefix_listc              
   C   s   t |d|d}g d}tg dg dg dgdg| tjd}|dtji}|rWtjtg d	dd
tg ddddtg ddddtg ddddtg ddddgdd}t	
|| d S )Nbadrz   )bad_abad_br   bad_cr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rl   r   )rR   r   r   zSparse[uint8])rR   r    r3   r   r   r4   r   r   rj   )r   r   r   r   r   rP   pdconcatr   r<   r=   )r   r   r%   rC   bad_columnsrB   r   r   r   !test_dataframe_dummies_prefix_str   s&   z0TestGetDummies.test_dataframe_dummies_prefix_strc                 C   s   t |dgdg|d}tg dg dg dg dd}|j}||d	d   tj||d	d  < |d
g |d
g< |rJddg}|| tdd||< t|| d S )Nr   r   )r{   rE   r%   r   r   r   r3   )r   r   r   r   r   r   r   r   r   r   )	r   r   rE   r   r   r   r	   r<   r=   r   r   r%   rC   rB   r   r   r   r   test_dataframe_dummies_subset  s   $z,TestGetDummies.test_dataframe_dummies_subsetc                 C   s   t |d|d}tg dg dg dg dg ddtjd	}|d
g |d
g< |g d }|r>g d}|| tdd||< t|| t |ddg|d}|jdddd}t|| t |ddd|d}t|| d S )Nz..
prefix_sepr%   r   r   r3   r   r4   )r   A..aA..bB..bB..cr*   r   )r   r   r   r   r   r   __B__bB__c)r   r   rE   r   )	r   r   r   r   r   r	   r<   r=   renamer   r   r   r   !test_dataframe_dummies_prefix_sep,  s*   
z0TestGetDummies.test_dataframe_dummies_prefix_sepc                 C   N   t d}tjt|d t|dg|d W d    d S 1 s w   Y  d S )NzPLength of 'prefix' (1) did not match the length of the columns being encoded (2)r'   ztoo fewrz   reescaper+   r,   r-   r   r   r   r%   r.   r   r   r   (test_dataframe_dummies_prefix_bad_lengthG     "z7TestGetDummies.test_dataframe_dummies_prefix_bad_lengthc                 C   r   )NzTLength of 'prefix_sep' (1) did not match the length of the columns being encoded (2)r'   r   r   r   r   r   r   r   ,test_dataframe_dummies_prefix_sep_bad_lengthO  r   z;TestGetDummies.test_dataframe_dummies_prefix_sep_bad_lengthc                 C   s   ddd}t g dg dg dd}t|||d}t g dg d	g d
g dg dd}g d}|| tj||< |rJ|| tdd||< t|| d S )Nr   r   r   r   r   r   )r   r   r   rz   r   r3   r   r4   r   r   r   r   )r   r   r   r   r   r	   r<   r=   )r   r%   r   r   rC   rB   rE   r   r   r   "test_dataframe_dummies_prefix_dictW  s    

z1TestGetDummies.test_dataframe_dummies_prefix_dictc                 C   s   t jt jt jg|jdd d f< t|d||djdd}|r%t}t|d}nt j}|}tdddt jg|g d|d	|g d
|d	|g d|d	|g d|d	|g d|d	|g d|d	djdd}t	
|| t|d||d}|g d }t	
|| d S )Nr   Trh   r   rj   r   r   )r   r   r   r   r*   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   A_nanr   r   B_nanFr   )r   r^   locr   rW   r   r	   r   r   r<   r=   r   r   r   r   test_dataframe_dummies_with_nam  s2   
z-TestGetDummies.test_dataframe_dummies_with_nac                 C   s   t g d|d< t|||djdd}|rt}t|d}ntj}|}tg d|g d|d	|g d
|d	|g d|d	|g d|d	|g d|d	|g d|d	djdd}t	|| d S )Nr   yr   catr9   r   rj   r   r   r   r*   r3   r   r4   r2   r~   )r   r   r   r   r   cat_xcat_y)
r   r   rW   r   r	   r   r   r   r<   r=   r   r   r   r   'test_dataframe_dummies_with_categorical  s(   
z6TestGetDummies.test_dataframe_dummies_with_categoricalzget_dummies_kwargs,expecteddata   är   u   ä_ar   r*   r   u   x_ä)r   r{   )r   r   u   xäac                 C   s   t di |}t|| d S )Nr   )r   r<   r=   )r   get_dummies_kwargsrB   rC   r   r   r   test_dataframe_dummies_unicode  s   z-TestGetDummies.test_dataframe_dummies_unicodec                 C   s   t d}t|}t|t d}tg dg ddtjd}t|d|d}|r-|jtd	d
}t	|| t|d|d}t	|| t d|_
t|d|d}t	|| d S )Nr0   r1   r3   r4   )r   r   r*   T
drop_firstr%   r   r7   )r:   r   r   r   r   r   r;   r   r<   r=   r>   r   r%   r?   r@   rA   rB   rC   r   r   r   !test_get_dummies_basic_drop_first  s   
z0TestGetDummies.test_get_dummies_basic_drop_firstc                 C   s   t d}t|}t|t d}ttdd}t|d|d}t|| t|d|d}t|| tt dd}t|d|d}t|| d S )Naaar1   r   r\   Tr   )r:   r   r   r   aranger   r<   r=   r   r   r   r   +test_get_dummies_basic_drop_first_one_level  s   z:TestGetDummies.test_get_dummies_basic_drop_first_one_levelc           	      C   s   ddt jg}t|d|d}tdg dit jd}|r!|jtdd}t|| t|dd|d	}tdg dt jg d
it jdj	dt jgdd}|rP|jtdd}t|| tt jgdd|d	}tt 
dd}t|| d S )Nr   r   Tr   r3   r*   r   r7   ri   r   r%   r4   r   rj   r\   )r   r^   r   r   r   r;   r   r<   r=   rm   r   )	r   r%   s_NArq   rr   rs   rt   ru   rv   r   r   r   $test_get_dummies_basic_drop_first_NA  s$    
z3TestGetDummies.test_get_dummies_basic_drop_first_NAc                 C   sX   |ddg }t |d|d}tg dg ddtjd}|r$|jtd	d
}t|| d S )Nr   r   Tr   r3   r4   )r   r   r*   r   r7   )r   r   r   r   r;   r   r<   r=   r   r   r   r   !test_dataframe_dummies_drop_first  s   z0TestGetDummies.test_dataframe_dummies_drop_firstc                 C   s   t g d|d< t|d|d}tg dg dg dg dd	}g d
}|| tj||< |g d	 }|rC|D ]
}t|| ||< q8t|| d S )Nr   r   Tr   r   r3   r4   r~   )r   r   r   r   )r   r   r   )	r   r   r   r   r   r   r   r<   r=   )r   r   r%   r    rC   rB   r   colr   r   r   2test_dataframe_dummies_drop_first_with_categorical  s   zATestGetDummies.test_dataframe_dummies_drop_first_with_categoricalc                 C   s   t jt jt jg|jdd d f< t|dd|djdd}tdddt jgg dg dg d	g dd
}g d}|| t j||< |jdd}|rU|D ]
}t|| ||< qJt	
|| t|dd|d}|g d }t	
|| d S )Nr   Tr   r   rj   r   r   r   r   )r   r   r   r   r   )r   r   r   r   F)r   r   r   )r   r^   r   r   rW   r   r   r   r   r<   r=   )r   r   r%   rC   rB   r   r   r   r   r   )test_dataframe_dummies_drop_first_with_na  s0   	z8TestGetDummies.test_dataframe_dummies_drop_first_with_nac                 C   s   t g d}t|}tddgddgddggddgtjd}t|| t tg d}t|}tddgddgddggtddgtjd}t|| d S )	Nr   r   r   r   r   r   rl   r   r   r   )r   r   r   r   r   r<   r=   r   )r   r   rC   rB   r   r   r   test_get_dummies_int_int/  s   &"z'TestGetDummies.test_get_dummies_int_intc                 C   s   t g dtg dg dg dd}g d}t g dg dg dg|d}||d	d   |||d	d  < t|d
dg|d}t|| d S )Nr   r   )      ?       @r   )r   r   r   D)r   r   A_1A_2B_ar   )r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   rl   )r   r   r   r   r<   r=   )r   r    r   rE   rB   rC   r   r   r   test_get_dummies_int_df<  s   
"z&TestGetDummies.test_get_dummies_int_dforderedTFc                 C   sx   t tdtd|d}t||d}tjg dg dg| |d}t|j|j|d}t||| |d}t	
|| d S )Nxyxyz)
categoriesr   r*   r2   r3   rl   )r   r:   r   r   r   r&   r   r   r   r<   r=   )r   r    r   r   rC   r   r   rB   r   r   r   1test_dataframe_dummies_preserve_categorical_dtypeN  s    
z@TestGetDummies.test_dataframe_dummies_preserve_categorical_dtypec                 C   sL   t ddgddgd}t|dg|d}|jdgd	}t|dg | d S )
Nr   r   ABCD)GDPNationr   rE   r%   r   r   )r   	from_dictr   rm   r<   r=   )r   r%   r   df2r   r   r   *test_get_dummies_dont_sparsify_all_columns\  s   z9TestGetDummies.test_get_dummies_dont_sparsify_all_columnsc                 C   sh   g d|_ t|jdd}tg dg dg dgg dtjdjdd}|d	tji}t	|| d S )
N)r   r   r   r   rj   r   r   r   )r   r   r   r   A_crl   r   )
rE   r   rW   r   r   r   r   rP   r<   r=   r   r   r   r   "test_get_dummies_duplicate_columnse  s   
z1TestGetDummies.test_get_dummies_duplicate_columnsc                 C   s`   t dddgi}t|dgdd}tdd}t tddg|dtddg|dd	}t|| d S )
Nr   r   r   Tr   r   r   r*   )r   r   )r   r   r	   r   r<   r=   )r   r   rC   r    rB   r   r   r   test_get_dummies_all_sparset  s   
z*TestGetDummies.test_get_dummies_all_sparsero   bazc                 C   sf   t g dg dg dg dd}d}tjt|d t||d W d    d S 1 s,w   Y  d S )	N)r   r   r            )oner  r  twor  r  )r   r   r   r   r   r   )r   r   zqwt)barfoor   zooz1Input must be a list-like for parameter `columns`r'   r   )r   r+   r,   	TypeErrorr   )r   ro   r   r.   r   r   r   #test_get_dummies_with_string_values  s   	"z2TestGetDummies.test_get_dummies_with_string_values)0__name__
__module____qualname__r+   fixturer   r   float64rQ   r    r%   r&   r/   rD   r[   rg   rw   r   r   r   r   r   r   r   r   r   r   r   r   r   markparametrizer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r
      sv    


2


r
   )r   numpyr   r+   pandas.core.dtypes.commonr   pandasr   r   r   r   r   r   pandas._testing_testingr<   pandas.core.arrays.sparser   r	   r
   r   r   r   r   <module>   s    