o
    ieb                     @   sN  d dl Zd dlZd dlmZmZmZmZmZ d dl	m
Z ejdd Zdd Zdd Zd	d
 Zdd Zejdg dejdddgejdg dejdddgejdddgdd Zejdddgejdddg dg dg dfddg dg dg dfddg dg d g dfgd!d" Zejd#d$ Zejd%dddg d&g d'fdddg d(g d)fdddg d*g d+fdddg d,g d+fgd-d. Zejd/d0 Zejd1ddg d2g d3fddg d4g d5fddg d6g d7fddg d8g d9fgd:d; Zejd<d= Zejd>dd?d?gejg d@g dAdBfdg dCeed?geg dDedEdFejggg dGg dHg dIgg dAdJfgejdddgdKdL ZejdddgejdMddgejdNdejg dOejdPfdeg dQfgdRdS Z dTdU Z!ejdddgejdNdejg dOejdPfdeg dQfgdVdW Z"ejdddgejdNdejg dXejdPfdeg dYfgdZd[ Z#ejdddgejd\dg d]fdg d^fgejdNdejg d_ejdPfdeg d`fgdadb Z$ejdddgejdMddgejdNdejg dOejdPfdeg dQfgdcdd Z%ejdeddfg dgfddhg d9fgdidj Z&ejdkdle'dmg dnfdoe'dpdqg g drfgejdddgdsdt Z(ejdudvdwgdxdy Z)dzd{ Z*d|d} Z+d~d Z,dd Z-dd Z.dS )    N)CategoricalIndex	DataFrameIndex
MultiIndexSeriesc                   C   s   t g dg dg ddS )N)maler   femaler   r   r   )lowmediumhighr	   r   r	   )USFRr   r   r   r   )gender	educationcountryr    r   r   g/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/pandas/tests/groupby/test_frame_value_counts.pyeducation_df   s   r   c                 C   sJ   | j ddd}tjtdd |  W d    d S 1 sw   Y  d S )Nr      axisr   match)groupbypytestraisesNotImplementedErrorvalue_countsr   gpr   r   r   	test_axis   s   
"r!   c                 C   sL   |  d}tjtdd |jdgd W d    d S 1 sw   Y  d S )Nr   subsetr   r"   )r   r   r   
ValueErrorr   r   r   r   r   test_bad_subset   s   
"r%   c                 C   sL   |  dddg jdd}tg dtjg dg dd	d
}t|| d S )Nr   r   r   T	normalize)      ?      ?r)   r(   r(   )r   r   r	   r   r   r   r   r   r
   r   r   r   r   r   r	   r   r   r   namesdataindex)r   r   r   r   from_tuplestmassert_series_equal)r   resultexpectedr   r   r   
test_basic%   s   r:   c                 C   s   | | j |||dS )Nr'   sort	ascending)r   )dfkeysr'   r<   r=   r   r   r   _frame_value_counts:   s   r@   r   columnarrayfunctionr'   TFzsort, ascending))FN)TT)TFas_indexframec                    s  d d j  fddd| } j||d}|ddg j|||d}	|r|tddg|||}
|r9t|	|
 d S |r=d	nd
}|
 jd|idd}
|dkrc|
jddidd}
t	
|
d dd|
d< n|dkrp|
d dk|
d< nt	
|
d dd|
d< t|	|
 d S  d d  d   d< |d j|||d}
d |
_|r|
jjdd}|d jdjd|d< |d jdjd|d< |d= |jdd idd}t||
_t|	|
 d S |
dd|
d jdjd |
dd|
d jdjd |
d= t|	|
 d S )Nr   c                    s    d |  dkS )Nr   r   r   )xr   r   r   <lambda>X       z6test_against_frame_and_seriesgroupby.<locals>.<lambda>rA   )byrE   r   r   r;   
proportioncountr   r   r   rB   level_0r   r   rD   -bothFr4      )valuesr   r   applyr@   r6   r7   reset_indexrenamenpwhereassert_frame_equalnamer4   to_framestrsplitgetr   
from_frameinsert)r   r   r'   r<   r=   rE   rF   rK   r    r8   r9   rZ   index_framer   rH   r   $test_against_frame_and_seriesgroupby>   sT   
""rb   zCsort, ascending, expected_rows, expected_count, expected_group_size)r   r   rR         )r   r   r   rR   r   )r   rc   r   rc   r   )rd   rc   r   rR   r   )r   rR   r   r   r   )r   rc   rc   r   r   )rd   r   rc   rR   r   )r   r   rR   r   r   c           
         s   j ddgddd}|d j|||d}t }	dD ]  fdd	|D |	 < q|r7||	d
< |	d
  |  < n||	d< t||	 d S )Nr   r   F)rE   r<   r   r;   r/   c                       g | ]}  | qS r   r   .0rowrB   r   r   r   
<listcomp>       z!test_compound.<locals>.<listcomp>rL   rM   )r   r   r   r6   rY   )
r   r'   r<   r=   expected_rowsexpected_countexpected_group_sizer    r8   r9   r   ri   r   test_compound   s   ro   c                   C   s$   t g dg dg ddg ddS )Nr   r   r   r   )rR   rd   rd      )rR   r   r   r   keynum_legs	num_wings)falcondogcatantrQ   r   r   r   r   r   
animals_df   s   rz   z9sort, ascending, normalize, expected_data, expected_indexr   rR   r   )r   r   r   )rR   rd   rq   rR   r   r   r   r   rR   )r|   )rR   rq   rd   r}   )rR   r   r   )r|   )rd   rR   rq   )r   rR   r   )r(   r)   r)   c           	      C   s\   | j |||d}t|tj|g ddd}t|| | dj |||d}t|| d S )N)r<   r=   r'   rr   r0   r2   rs   )r   r   r   from_arraysr6   r7   r   )	rz   r<   r=   r'   expected_dataexpected_indexresult_framer9   result_frame_groupbyr   r   r   test_data_frame_value_counts   s   
r   c                  C   s`   t j} tdd| d| ddddg	ddd| | ddddg	dddddd| d| g	ddddddd| | g	d	S )
Nr   rd   rq   rc   rR            )ABCD)rW   nanr   )nr   r   r   nulls_df   s   r   z:group_dropna, count_dropna, expected_rows, expected_values)	r   r   rc   r   r   rq   r   rR   rd   )	r(   r(         ?r)   r)   r)   r)   r   r   )r   r   rc   r   rR   rd   )r(   r(   r   r   r   r   )r   r   r   r   rq   r   )r(   r(   r)   r)   r)   r)   )r   r   r   )r(   r(   r   c           
         sr   j ddg|d}|jdd|d}t }jD ]  fdd|D | < qt|}t||d}	t||	 d S )	Nr   r   )dropnaT)r'   r<   r   c                    re   r   r   rf   rB   r   r   r   rj      rk   z,test_dropna_combinations.<locals>.<listcomp>r2   )	r   r   r   columnsr   r_   r   r6   r7   )
r   group_dropnacount_dropnarl   expected_valuesr    r8   r   r4   r9   r   r   r   test_dropna_combinations   s   

r   c                 C   s    t g dg dd| | dgdS )Nrp   )JohnAnner   BethSmithLouisers   
first_namemiddle_namer   )nulls_fixturer   r   r   names_with_nulls_df   s   
r   z%dropna, expected_data, expected_indexr   )r   r   )r   r   )r   r   r   r0   rp   )r   r   r   r   r   )r   r   r   r   )r   r   rR   rR   )rR   r   r   rR   )levelscodesr1   c                 C   s^   | j ||d}t||d}|r|tt| }t|| | dj ||d}t|| d S )N)r   r'   r2   rs   )r   r   floatlenr6   r7   r   )r   r   r'   r   r   r   r9   r   r   r   r   #test_data_frame_value_counts_dropna  s   !
r   observedznormalize, expected_data)rR   r   r   r   r   r   r   r   r   r   r   r   )dtype)r(   r)   r)           r   r   r(   r(   r   r   r   r   c                 C   s   |  djd||d}|j|d}tjg dg dd}t||d}td	D ]}	|jjt	|jj
|	 |	d
|_q&|rBt|| d S |j|rHdndd}
t||
 d S )Ncategoryr   rE   r   r&   r*   r+   r,   r   r   r	   r   r   r
   r   r   r   r-   r.   r   r   r	   r   r   r
   r   r   r   r   r   r
   r/   r0   r2   rc   levelrL   rM   rZ   )astyper   r   r   r5   r   ranger4   
set_levelsr   r   r6   r7   rU   rY   r   rE   r   r'   r   r    r8   r   expected_seriesir9   r   r   r   =test_categorical_single_grouper_with_only_observed_categories7  s,   


r   c                 C   s   |   d} | d jdg| d< | jd||d}|j|d}t|tj|g ddd}t	d	D ] }	t
|jj|	 }
|	d
krH|
| d jj}
|jj|
|	d|_q1|r\t|| d S |j|rbdndd}t|| d S )Nr   r   ASIAr   r&   r/   r0   r2   rc   r   r   rL   rM   r   )copyr   rx   add_categoriesr   r   r   r   r5   r   r   r4   r   set_categories
categoriesr   r6   r7   rU   rY   )r   rE   r   r   r'   r   r    r8   r   r   index_levelr9   r   r   r   !assert_categorical_single_groupers  s0   

r   c                 C       g d}t | |d|||d d S )Nr   Tr   rE   r   r   r'   r   r   r   rE   r'   r   r   r   r   r   -test_categorical_single_grouper_observed_true  s   
r   )rR   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r(   r)   r)   r   r   r   r(   r(   r   r   r   r   r   r   r   r   r   r   c                 C   r   )N)r*   r+   r,   r   r   r   r-   r.   r   r   r   r   )r   r   r	   )r   r   r   )r   r   r
   )r   r   r	   )r   r   r   )r   r   r
   Fr   r   r   r   r   r   .test_categorical_single_grouper_observed_false  s   *
r   zobserved, expected_index)r   r   r   )r   r   r   r   r	   r   )r   r	   r   r   r
   r   )r   r
   r   r   r   r   )r   r   r   r   r	   r   )r   r	   r   )r   r
   r   )r   r
   r   )r   r   r   r   r   )r   r   rR   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   c                 C   s   |   } | d d| d< | d d| d< | jddg||d}|j|d}t|r/||dk n|tj|g ddd	}td
D ]}	|jj	t
|jj|	 |	d|_q?|r[t|| d S |j|radndd}
t||
 d S )Nr   r   r   r   r&   r   )r   r   r   r0   r2   rR   r   rL   rM   r   r   r   r   r   r   r   r5   r   r4   r   r   r   r6   r7   rU   rY   )r   rE   r   r   r'   r   r    r8   r   r   r9   r   r   r   "test_categorical_multiple_groupers	  s0   2


r   c                 C   s   |   } | d d| d< | d d| d< | jd||d}|j|d}g d}t|tj|g dd	d
}tddD ]}	|jj	t
|jj|	 |	d|_q:|rVt|| d S |j|r\dndd}
t||
 d S )Nr   r   r   r   r   r&   r   r/   r0   r2   r   rc   r   rL   rM   r   r   r   r   r   r   test_categorical_non_groupersY  s.   

r   z*normalize, expected_label, expected_valuesrM   r|   rL   c                 C   s|   t g dg dd}|jg dddd gdd	}|jd
| d}t dg ddg ddg ddg d||i}t|| d S )Nr{   r   rR   rc   )r   r   )rd   r   rd   r   c                 S   s   | dkrdS dS )Nr   r   r   r   )r   r   r   r   rI     rJ   z&test_mixed_groupings.<locals>.<lambda>FrE   T)r<   r'   rN   )rd   rd   r   r~   level_2)r   r   r   r   )r   rc   rR   )r   r   r   r6   rY   )r'   expected_labelr   r>   r    r8   r9   r   r   r   test_mixed_groupings  s   		r   ztest, columns, expected_namesrepeatabbde)aNdbr   er   abcdlevel_1)r   Nr   r   cr   c           	      C   s   t g dg dg|d}ddg}|jdddgd	g|d
 }|r3tdtj||dd}t|| d S dd |D }t|}d|d< |	d t ||d}t
|| d S )N)r   rc   r   r   	   )rR   rd   rq   r   
   r   )r   r   r   rc   r   r   )rR   r   r   rd   rq   r   r   r   r   r   r   r   r0   r2   c                 S   s   g | ]	}t |d g qS )r   )listrf   r   r   r   rj     s    z0test_column_label_duplicates.<locals>.<listcomp>r   rM   )r   r   r   r   r   r5   r6   r7   r   appendrY   )	testr   expected_namesrE   r>   r   r8   r9   expected_columnsr   r   r   test_column_label_duplicates  s$   
r   znormalize, expected_label)FrM   )TrL   c                 C   sn   t g dgdd|gdjddd}d| d}tjt|d	 |j| d
 W d    d S 1 s0w   Y  d S )Nr   r   r   r   Fr   zColumn label 'z' is duplicate of result columnr   r&   )r   r   r   r   r$   r   )r'   r   gbmsgr   r   r   test_result_label_duplicates  s   	"r   c                  C   sX   t dddgi} | ddg}| }tdgtjddggd dgdd}t|| d S )Nr   r   rR   r0   rQ   )r   r   r   r   r   r5   r6   r7   )r>   r   r8   r9   r   r   r   test_ambiguous_grouping  s
   "r   c                  C   sj   t g dg ddg dd} d}tjt|d | djdgd	 W d    d S 1 s.w   Y  d S )
Nr   r   r   rG   yr   c1c2r   r   r   rQ   z;Keys {'c1'} in subset cannot be in the groupby column keys.r   r   r#   r   r   r   r$   r   r   r>   r   r   r   r   "test_subset_overlaps_gb_key_raises  
   "r   c                  C   sj   t g dg ddg dd} d}tjt|d | djd	gd
 W d    d S 1 s.w   Y  d S )Nr   r   r   r   rQ   z4Keys {'c3'} in subset do not exist in the DataFrame.r   r   c3r#   r   r   r   r   r   !test_subset_doesnt_exist_in_frame  r   r   c                  C   sn   t g dg ddg dd} | jddjdgd	}td
dgtjdd
gddggd dgdd}t|| d S )Nr   r   r   r   rQ   r   r   r   r#   r   rR   rG   r   r0   r   r   r   r   r   r   r6   r7   r>   r8   r9   r   r   r   test_subset  s   "r   c                  C   s~   t g dg dg dgg dg dd} | jddjdgd	}td
dgtjdd
gddgddggg ddd}t|| d S )N)r   rG   rG   )r   r   r   r   )r   r   r   )r4   r   r   r   r   r#   r   rR   rG   r   )Nr   r   r0   rQ   r   r   r   r   r   test_subset_duplicate_columns  s   r   )/numpyrW   r   pandasr   r   r   r   r   pandas._testing_testingr6   fixturer   r!   r%   r:   r@   markparametrizerb   ro   rz   r   r   r   r   r   r   r   rC   int64r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s`   

>

	



	

0$

$$ $/


	
