o
    i*                     @   s~  d dl m Z  d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ ejfddZi dejdejd	d d
ejdejdejdejdejdejdejdejdejdejdejdd dejdejejd ejejd ejdZdd Zdd Zdd Zdd  Zejd!d"d#gd$d% Zejd&d'd( Zd)d* Zd+d, Zd-d. Z d/d0 Z!d1d2 Z"d3d4 Z#ejd!d"d#gejd5d"d#gejd6d"d#gd7d8 Z$d9d: Z%d;d< Z&d=d> Z'd?d@ Z(dAdB Z)ejdCdDgee*dEd dFgdGdHgdIfee*dE+dJdK dHgd dGgdIfee*dLd dFgdGdHgdIfgdMdN Z,dOdP Z-dQdR Z.dSdT Z/ejdUddVdgfddWdXgfddWdgfddVdXgfgdYdZ Z0d[d\ Z1d]d^ Z2d_d` Z3dadb Z4dcdd Z5dedf Z6dgdh Z7didj Z8ejdkd"ee
j9eg dldmdng dogdpdqgdrg dsdtdufd#eeg dldmdng dog dsdvfgdwdx Z:ejdydej;ej<gdzd{ Z=ej>d|d} Z?ejd~ddgdd Z@ejd~ddgejd5d#dgdd ZAejdd"e
9e	dgdE dgdE  dmdde	g ddmdde	ddgdE gg dfd#e
jBeddgd#deg dd#de	ddggg ddrdHdHdEdEej;ej;dGdGej;ej;dFdFgfde
jBeddgd#deg dd#de	ddggg ddrdHdHdEdEej;ej;dGdGej;ej;dFdFgfgdd ZCdd ZDejdg dg dgdd ZEejddd ZFdd ZGdd ZHdd ZIdd ZJejd5d#dgdd ZKdd ZLejdejMg dlfejNg dofgdd ZOdd ZPejdddgdd ZQdd ZRdd ZSdd ZTdd ZUdd ZVejdddgdeWd5eXfddZYejdddgdeWd5eXfddZZdd Z[ejdddgdd Z\ddÄ Z]ddń Z^ddǄ Z_dS )    )datetimeN)CategoricalCategoricalIndex	DataFrameIndex
MultiIndexSeriesqcut)get_groupby_method_argsc                 C   s.   dd }t jt|||d}| j||d S )zpReindex to a cartesian production for the groupers,
    preserving the nature (Categorical) of each grouper
    c                 S   s4   t | ttfr| j}tjtt||| jd} | S )N
categoriesordered)	
isinstancer   r   r   
from_codesnparangelenr   )ar    r   `/var/www/edux/Edux_v2/venv/lib/python3.10/site-packages/pandas/tests/groupby/test_categorical.pyf   s   z)cartesian_product_for_groupers.<locals>.fnames
fill_value)r   from_productmapreindex
sort_index)resultargsr   r   r   indexr   r   r   cartesian_product_for_groupers   s   r"   allanycountcorrwithfirstidxmaxidxminlastmadmaxmeanmedianminnthnuniqueprodquantile)semsizeskewstdsumvarc                 C   sB   t | jd}dd }| j|ddj|}|jjd dksJ d S )N   c                 S   s   |   |  |  |  dS )Nr/   r,   r%   r-   r;   )groupr   r   r   	get_statsI   s
   z2test_apply_use_categorical_name.<locals>.get_statsFobservedr   C)r	   r@   groupbyDapplyr!   r   )dfcatsr=   r   r   r   r   test_apply_use_categorical_nameF   s   rF   c                  C   s~  t g dg ddd} tg d| d}ttdddd	}td
dddtjgi|d}|jddd }t	|| t g dg ddd}t g dg ddd}t||g dd}|jddd}tg dddd	}	tdt
g d|	di}d}
tjt|
d | }W d    n1 sw   Y  t	|| tddgddgddggddgd }t |j|d< |jdgdd}|d!d" }t	||dg  |d}|jd#dg }t	|| d$d% }||}|jd#dg  }tddgdd&|_|d d'|d< t	|| td
g d(i}tj|jg d)d*}|jj|ddt}t||d
  t|jj|ddd+d" |d
  t	|j|ddt|d
g  |j|dd}tjtd,dd- |d.d" }W d    n	1 sow   Y  t	||d
g  td " |d/d" }|t}|tjj}|d0d" }W d    n	1 sw   Y  tj	||d
g dd1 tj	||d
g dd1 t	||d
g  t	||d
g  t|jj|ddtj|d
  t	|j|ddtj| td
g d2i}tj|jg d3d*}|jj|ddt}t||d
  t|jj|ddd4d" |d
  t	|j|ddt|d
g  t	|j|ddd5d" |d
g  td
g d6i}tj|jg d7t tdd8}|j|ddt }t|j!j"|j!j#d9}t
g d6|d}d
|j_$t|| g d:}tj%j&d#dd;d<}t j'||dd9} ttj%(d;d}|j| dd }|jt)| dd }t|| j"dd}	|*|	}t	|| |j| dd}|+ }| j,- }t)| .|}|.|}t |dg d:d=}|j|ddd>+ }t	|| t j't/d0d?|dd9}t|}t1|2 j3d#| tg d@d }t1|2 j3d| d S )AN	r   r   r   brH   rH   crI   rI   r   rH   rI   dTr   	   rM   rM      rN   rN      r:      r   rH   abcdrH   namer   r   rM   rN   r:   r!   Fr>   r   r   rH   rH   r   rH   zrI   rK   rI   rK   rI   rK   yrM   rN   rO   r:   ABvaluesr^   r`   )rO      r   !The default value of numeric_onlymatchzJohn P. Doez	Jane Dove	person_idperson_namecolumnsc                 S   s   | S Nr   xr   r   r   <lambda>x       ztest_basic.<locals>.<lambda>r   c                 S   s   |  djd S )Nrf   r   )drop_duplicatesilocrj   r   r   r   r      s   ztest_basic.<locals>.frT   object)rP         )r   
         (   )binsc                 S   
   t | S ri   r   r8   xsr   r   r   rl         
 z
scalar maxrd   check_stacklevelc                 S   ry   ri   r   r,   r{   r   r   r   rl      r}   c                 S   s   t j| ddS )Nr   axisr   r{   r   r   r   rl          c                 S   s   t j| S ri   )r   maximumreducer{   r   r   r   rl          )check_dtype)rP   rr   rs   )ir   rt   ru   rv   rw   c                 S   ry   ri   rz   r{   r   r   r   rl      r}   c                 S   ry   ri   rz   r{   r   r   r   rl      r}   )rM   r   r   r   r   rM   rN   rO   r:   )labelsr   foobarbazquxd   r5   )r   r   sortr?      r%   r-   r7   r/   z25%z50%z75%r,   )4r   r   r   listr   nanrA   r-   tmassert_frame_equalr   assert_produces_warningFutureWarningr8   rf   	transformrn   ro   rC   copyr   r!   astypepdcutr   assert_series_equalr,   r   r   filterr#   r   r`   r   r   rT   randomrandintr   randnasarrayr   describecodesargsorttaker   repeatassert_index_equalstackget_level_values)rE   data	exp_indexexpectedr   cat1cat2rD   gbexp_idxmsgrk   gr   rI   gbcresult2result3result4result5levelsr   groupeddesc_resultidx
ord_labelsord_dataexp_catsexpcexpr   r   r   
test_basicU   s   


"
$"



r   c                 C   s   t tdddttddgtdgdgd dgd  tdgd	d
gdd}|jd	g| d}t tdddttddgtdgdgd tdgd	d
gdd}|d}t	|| d S )NrN      r   rH   rt   r   rP   rM   Index1Index2)r   r   r   r   r!   levelr?      )
r   r   r   r   r   rangerA   	get_groupr   r   )r?   rD   r   r   r   r   r   r   test_level_get_group   s&   
r   c                  C   s   t dgd dgd  g dd tdd} t| jg dd	d
| _| dd  }|jdd	d}g d}t|g dd	d
}g dt|g}t	j
|ddgd}tdgd |dd}t|| d S )Nr^      r_   )highmedlowr:   g      (@)r<   doseoutcomes)r   r   r   Tr   r<   r   r   )r   sort_remaining)r   r   r   r   r   r   )r^   r^   r^   r_   r_   r_   r   rN   r!   rT   )r   r   r   r   r   rA   value_countsr   r   r   from_arraysr   r   r   rD   r   r!   r   r   r   r   (test_sorting_with_different_categoricals  s   
r   r   TFc           	      C   s6  t td| d}t tdddg| d}tt|}t|||d}|jdd	gd
d}tj||gdd	gd}tg d|dgd}t	j
tddd |dd }W d    n1 sZw   Y  t	|| | }t	|| |tj}t	|| tj||gdd	gd}td|d}|dd }t	|| d S )Nabcr   aaar   rH   r   )missingdenser`   r   r   Tr>   r   )r   rM          @r`   r!   rh   zSelect only validFr~   c                 S   ry   ri   )r   r-   rj   r   r   r   rl   0  r}   ztest_apply.<locals>.<lambda>rM   rU   c                 S   s   dS NrM   r   rj   r   r   r   rl   <  rm   )r   r   r   r   r   r   rA   r   r   r   r   r   rC   r   r-   aggr   r   )	r   r   r   r`   rD   r   r   r   r   r   r   r   
test_apply  s*   r   z.ignore:.*value of numeric_only.*:FutureWarningc                 C   s:  t g dg ddd}t g dg ddd}t||g dd}d	d
gd |d< |jg d| d}tj||d	d
gd gg dd}tdtg d|di }| }| sdt|||d	d
ggt	ddd}t
|| |jddg| d}tj||gddgd}tdg di|d}| }| st|||gt	ddd}t
|| t g dg dddg dg dd}t|}|jd| d}	|	 }tt	ddt	ddd }td!d!gd"d#gd$|d}| stt	ddt	ddd }
||
}t
|| |jdd%g| d}|d&}tg d't g dg dddg d(d)dd%g}| s-t||jjd*dggdd%g}t
|| d+D ]}|\}}||}||j|k|j|k@  }t
|| q5g d,g d-g d.d/}t|}t|d	 tdd0d1}||d2< |jd2d3gd4| d5}|d&}|jd2d3gd| d5}|d& }t
|| d S )6NrV   rW   Tr   rY   rZ   r\   r]   r   r   rN   r@   )r^   r_   r@   r>   r   r`   rU   ABCr   r   r^   r_   AB)r   rH   r   rH   r   rH   rI   rM   rM   rN   rN   rt   ru   rv   rw   )catintsvalr   abr   )rT   r   r   g      ?      4@rv   )r   r   r   r-   )      $@g      >@r   g      D@rM   rN   rM   rN   )r   r   r   rM   ))r   rM   )rH   rN   )rH   rM   )r   rN   )rt   r   r:   r   r:   rM   rM   rt   ru   rv   rw   2   <   F   )rK   rI   er   r   rK   rI   r   r   r   rt   rO   r   r   Fas_indexr?   )r   r   rA   r   r   r   r   r8   r"   r   r   r   r-   r   r   r   	set_indexr   r`   r   r   r   r   r   linspacereset_index)r?   r   r   rD   r   r   r   r   rK   groups_single_keyr!   groups_double_keykeyrI   ir   groupsgroups2r   r   r   test_observed@  s   	


	

r  c                 C   s   g dg dg dd}t |}t|d g d}d|_|j|dg| d	}tj|g dgddgd
}t g dg dd|d}| sOt||jg dgddg}|	d}t
|| d S )N)rO   rO   r:   rP   r\   )rt   r      "   )C1C2C3r  )rM   rN   rO   r   r   r  r>   r   )      @r
  g      @g      @)r   g      Y@g      i@g      A@)r  r	  rU   r-   )r   r   r   rT   rA   r   r   r"   r`   r   r   r   )r?   rK   rD   r`   r   r   r   r   r   r   r   test_observed_codes_remap  s   
r  c                  C   s   t tjjddddtjjddddtjjdddddd} | jtd| d< | jg d	d
d}| }|j	j
d  | j ksEJ |j	j
d  | j ksTJ |j	j
d  | j kscJ d S )Nr      i0u  r   '  )r   int_idother_idr   categoryr   )r   r  r  Tr>   rM   rN   )r   r   r   r   r   r   strrA   r%   r!   r   r1   r  r  )rD   r   r   r   r   r   test_observed_perf  s   "r  c                 C   s   t g dg dd}t|g dd}|jd| d}|j}| r0tdd	gd
dtdgd
dd}ntdd	gd
dtg d
dtdgd
dd}t|| d S )N)r   rI   r   r   r   rM   rN   rO   r   valsr   r>   r   rN   int64dtyperM   )r   rI   )r   r   rA   r  r   r   assert_dict_equal)r?   r   rD   r   r   r   r   r   r   test_observed_groups  s   "
r  c                 C   s   t tdtjdgg ddg dd}|jd| d}|j}| r*dtdd	gd
di}ntdd	gd
dtg d
dtg d
dd}t|| d S )Nr   )r   rH   rK   r  r  r  r   r>   r   rN   r  r  )	r   r   r   r   rA   r  r   r   r  )r?   rD   r   r   r   r   r   r   test_observed_groups_with_nan  s   

r  c                  C   s   t dtjtjgg dd} tg d}t| |d}|jdddd	 d
}t g dg dd}tdtjtjg|d	d}d|j_t	
|| d S )Nr   r   r  r  )r   serr   Fr>   r  r   rM   r   )r   r   r   r   r   rA   r0   r!   rT   r   r   )r   r  rD   r   r!   r   r   r   r   test_observed_nth   s   r  c                 C   s   t tjdtjdgg dd}tg d}t||d}|jd| d  }| r8tt dgg dddgd}ntt g dg dddtjtjgd}t	|| d S )	Nr   r   r  r\   )s1s2r  r>   rN   )
r   r   r   r   r   rA   r'   r   r   r   )r?   r  r   rD   r   r   r   r   r   #test_dataframe_categorical_with_nan  s   r!  r?   r   c           	      C   s   t g dg d| d}tg d}t||d}|jd||dd d}t|jjd	d
}t|j}|s:d|| < t||ksRd|  d| d| d| }J |d S )N)rK   r   rH   r   rK   rH   )r   rH   r   rK   r   )labelr   r"  )r?   r   r   r'   rq   r  r   zDLabels and aggregation results not consistently sorted
for (ordered=z, observed=z, sort=z
)
Result:
)	r   r   r   rA   	aggregater!   arrayisnar#   )	r   r?   r   r"  r   rD   r   aggrr   r   r   r   0test_dataframe_categorical_ordered_observed_sort#  s2   	
r'  c                  C   s|  t jddd} tjjdddd}tj|| dd}ttjdd}|j	|d	d

 }|j	t|d	d

 }|| }t|j|jdd|_t|| |j	|d	d
}| }|j }||}	||}
|
j	|	d	d
 }t|| t|j|j t|jd|jd tjtdd| dd}t|}t| jd| tg dd }t| jd| d S )Nz
2014-01-01r:   )periodsr   r   r   Tr   Fr>   r   r   r   rM   )r   
date_ranger   r   r   r   r   r   r   rA   r-   r   r   r   r!   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   rE   r   r   r   r   r   r   r   r   r   r   r   r   r   test_datetimeF  s6   




r*  c                  C   s  t jd} g d}| jdddd}tj||dd}tt t dd	d	dt
d
d}||d< |djddd }|t
d
 j|jdd }ttjg d|dddd|_t|| |jddd }|t
d
 j|jdd }ttjg d|dddd|_t|| d S )Ni90  r   r   r:   ru   r   Tr   rR   rg   rE   Fr   r>   r   rM   rN   rO   rp   )r   r   RandomStater   r   r   r   r   r   reshaper   r   rA   r8   r   r   r!   r   r   )sr   r   rE   rD   r   r   r   r   r   test_categorical_indexm  s$   &r0  c                  C   sn   t g dg ddd} ttjdd| d}|g dd	  }t|	 j
|  t|	 j
j| j d S )
N)r   r   r   r   r   Tr   ru   r:   rg   r\   rP   )r   r   r   r   r   rA   r   r   r   r   rh   assert_categorical_equalr`   )rE   rD   r   r   r   r   !test_describe_categorical_columns  s   r2  c                  C   s   t tdddgd tdd d} | d d	| d< | jd
dgddd   }| }tddgddd}t	
|j| t	|jj|j |d |d  }tddgtddgd
dd}t	|| d S )Nrt   r^   r_   rP   XYXXYrN   )r   mediumartistr4  r  r5  Fr>   r   r   rT   r   r:   XYrp   rU   )r   r   r   r   rA   r%   unstackr   r   r   r   rh   r1  r`   r   r   r   )rD   gcatr   exp_columnsr   r   r   r   test_unstack_categorical  s   r<  c                  C   st   t tjtjddddddddg
} t|  jd}tjt	dd | 
|  W d    d S 1 s3w   Y  d S )NrM   rN   rO   r:   z$Grouper and axis must be same lengthrc   )r   r   r   r   r   dropnar`   pytestraises
ValueErrorrA   r-   )seriesrx   r   r   r   test_bins_unequal_len  s
    "rB  rA  r   r:   rO   rM   rN   r^   r_   c                 C   s   | d S r   r   )r   r   r   r   rl     s    rl   ra   c                 C   sD   |  ttddd}|t}t|t| d}t|| d S )NABBAr  r  rU   )rA   r   r   r#  r   keysr   r   )rA  r   rA   r   r   r   r   r   test_categorical_series  s   
rF  c                     s  t tg dg dg dg dd  jddgdd	d
 } t tddg jjjdddgddgdg dd}t| |  fdd} jd|gdd	d
 } t tddg jjjdddgddgdg dd}t| | tg ddd} jd|gdd	d
 } t| | ddg}t tddg jjjdddgddgdg dd}dD ]}t	t
d|d _ j|dd	d
 } t| | qd S )NrM   rN   rN   r  )rt      rH  )e   f   g   )r   r^   r_   r   r^   FTr   rM   rN   r  rt   rH  rI     rg   c                    s    j | df S )Nr^   )loc)rrD   r   r   rl     r   ztest_as_index.<locals>.<lambda>r   )r   rH   rH   rp   )Nr7  r_   r   )r   r   rA   r8   r   r   r   r   r   r   r   r!   )r   r   r   r/  group_columnsrT   r   rO  r   test_as_index  sR   	rQ  c                  C   s   t d} tdtt d| ddi}t| | ddd}t|jdddd j| t|jdddd j| tdtt d| ddi}t| | ddd}tt d	t d	ddd}t|jdddd j| t|jdddd j| d S )
Nr   r^   baTr   r6  Fr   bac)	r   r   r   r   r   r   rA   r'   r!   )r   rD   r!   r   nosort_indexr   r   r   test_preserve_categories   s$   rU  c               	   C   s  t g dg dttdtdddttdtdddd} t d	d
tjgddtjgttdtdddttdtdddd}dD ]D}d}tjt|d | j|ddd	 }| j|ddd	 
 }W d    n1 spw   Y  |j|jd}t|| t|| qDd S )N)rM   rN   rM   rM   rN   )rt      r      r  abaabrS  Fr   T)r^   r_   r  r  r   g      ?g      9@r   )r  r  rb   rc   )byr   r?   rg   )r   r   r   r   r   r   r   r   rA   r-   r   r   rh   r   )rD   exp_fullcolr   result1r   r   r   r   r   test_preserve_categorical_dtype  s2   	

r]  zfunc, valuessecondfourththirdc                 C   s   t g ddd}tg d|d}|d}t||  }tddgt||jd	dd}t|| |dd
 }t||  }|d
 }t	|| d S )N)r'   r^  r`  r_  Tr   )r+  r+  ra  )payloadr[  rb  ra  r+  r  r[  )
r   r   rA   getattrr   r  r   r   r   r   )funcr`   rI   rD   r   r   r   sgbr   r   r   test_preserve_on_ordered_ops9  s   
rf  c                  C   sP  t tjd} tg d}tj|g ddd}| j|dd }| j|dd }t	|j
|j|jd|_
t|| tg d	}tj|g d
dd}| j|dd }| j|dd |j}t	|j
|j|jd|_
t|| tg dg ddd}tg d|d} | jddd }|d j}tdddtjg}t|| d S )N	   )	r   r   r   rM   rM   rM   rN   rN   rN   r   rM   rN   Tr   Fr>   r   )	r   r   r   rM   rM   rM   rO   rO   rO   r,  rG   rJ   rL   rQ   rH   r   rM   rN   r:   )r   r   r   r   r$  r   r   rA   r-   r   r!   r   r   r   r   r   r   r`   r   assert_numpy_array_equal)r   r   rE   r   r   r   r   r   test_categorical_no_compressU  s6   
rj  c                  C   sb   t d gd tg dd} | d d }ttg ddgdtg d	dd
dd}t|| d S )NrO   )trainrk  testrC  r^   r_   rl  rk  r  rq   r  rT   r   )r   r   rA   r'   r   r   r   rD   r   r   r   r   r    test_groupby_empty_with_categoryz  s   ro  c                  C   s   t dtjdddi} dd tdddD }t||}| jdgdd	} tj| j	tdd
dd|d| d< | j
dgddd  }|t|jdd d }t|j|jjd|_t|| d S )Nvaluer   r  r   c                 S   s   g | ]}| d |d  qS )z - i  r   ).0r  r   r   r   
<listcomp>  s    ztest_sort.<locals>.<listcomp>i  T)rY  	ascendingi)  F)rightr   value_groupr>   c                 S   s   t |  d S )Nr   )floatsplitrj   r   r   r   rl     s    ztest_sort.<locals>.<lambda>)r   rp   )r   r   r   r   r   r   sort_valuesr   r   rp  rA   r%   sortedr!   r   rT   r   r   )rD   r   
cat_labelsresr   r   r   r   	test_sort  s   

r|  c               	   C   s  t g dg dg dg dg dg dg dgg dd	} t| d
 dd| d
< tg dd
dd}t ddgddgddgddggddg|d}d
}| j|ddd }t|| |}| j|ddd }t|| t| d
 dd| d
< tg dd
d}t ddgddgddgddggddg|d}tg dg dd
d}t ddgddgddgddgg|ddgd}d
}| j|ddd }t|| | j|ddd }t|| d S )N)	(7.5, 10]rt   rt   )r}  r   ru   )(2.5, 5]rP   rv   )(5, 7.5]r   rw   )r~  r:   r   )(0, 2.5]rM   r   )r  ra   r   )r   r   r   rg   r   Tr   )r  r~  r  r}  rS   rM   r   rP   rv   r   rw   rt   r   r   )rh   r!   Fr   rp   )r}  r~  r  r  r   rT   r   )r   r   r   rA   r'   r   r   )rD   r!   expected_sortr[  result_sortexpected_nosortresult_nosortr   r   r   
test_sort2  sX   	
"""r  c                  C   s  t tdddtdddtdddtdddtdddtdddtdddgg dg ddg dd	} t| d
 dd| d
< tdddtdddtdddtdddg}t ddgddgddgddggddgd	}t|d
dd|_tdddtdddtdddtdddg}t ddgddgddgddggddgd	}t||d
dd|_d
}t|| j|ddd  t|| j|ddd  t| d
 dd| d
< tdddtdddtdddtdddg}t ddgddgddgddggddgd	}t|d
d|_tdddtdddtdddtdddg}t ddgddgddgddggddgd	}t||d
d|_d
}t|| j|ddd  t|| j|ddd  d S )Ni  ra   rM   rN   rP   )rt   r   rP   r   r:   rM   ra   r   )dtr   r   rg   r  Tr   r   rv   r   rw   rt   r   r   rS   )r   rT   r   Fr   rp   r  )	r   r   r   r   r!   r   r   rA   r'   )rD   r!   r  r  r[  r   r   r   test_sort_datetimelike  s   






	



 



 



 



 r  c                  C   s  t tg dg ddg dd} tg ddd}| jddd	j }tg d
|dd}t|| | jddd	jjdd}tg d
|dd}t|| | jddd	jjdd}tddt	j
g|dd}t|| | jddd	jjdd}tdt	j
t	j
g|dd}t|| d S )Nr   r   rH   r   r  rM   rN   rM   rC  r^   rp   Fr>   )rO   rM   r   r_   r   	min_countrM   rO   rN   )r   r   r   rA   r_   r8   r   r   r   r   r   rD   expected_idxr   r   r   r   r   test_empty_sum4  s    r  c                  C   s   t tg dg ddg dd} tg ddd}| jddd	j }tg d
|dd}t|| | jddd	jjdd}tg d
|dd}t|| | jddd	jjdd}tddt	j
g|dd}t|| d S )Nr  r   r  r  rC  r^   rp   Fr>   )rN   rM   rM   r_   r   r  rM   rN   )r   r   r   rA   r_   r2   r   r   r   r   r   r  r   r   r   test_empty_prodP  s   r  c                  C   s   t ttdtttjddddd tdd} | dd	g }t	j
tg d
ttjddddgdd	gd}t ddddddddtjdg	i|d}t|| d S )N	abcbabcbaz2018-06-01 001TrO   )freqr(  rg  )key1key2r`   r  r  r   r   r`   r   r:   r   rP   r   rN   rU   )r   r   r   r   r)  r   r   rA   r-   r   r   r   r   r   )rD   r   r   r   r   r   r   ,test_groupby_multiindex_categorical_datetimeh  s"   
	
$r  zas_index, expectedrM   rM   rN   r  r  rG  r   rH   r   r  rk   )r!   r   rT   r   rH   rk   c                 C   sP   t tg dddg dg dd}|jddg| d	d
d  }t|| d S )Nr  r  r  rG  r  r  r   rH   Tr   rk   )r   r   rA   r8   r   assert_equal)r   r   rD   r   r   r   r   ,test_groupby_agg_observed_true_single_column  s
   r  r   c                 C   sJ   t g dg ddd}t g dg ddd}|jd| d}t|| d S )NrJ   Fr   )Nr   rH   rI   rM   r   )r   shiftr   r  )r   ctr   r{  r   r   r   
test_shift  s   r  c                 C   sX   |   dd }|d d|d< |d d|d< tg d|d< |jdgd	d
}|S )a  
    DataFrame with multiple categorical columns and a column of integers.
    Shortened so as not to contain all possible combinations of categories.
    Useful for testing `observed` kwarg functionality on GroupBy objects.

    Parameters
    ----------
    df: DataFrame
        Non-categorical, longer DataFrame from another fixture, used to derive
        this one

    Returns
    -------
    df_cat: DataFrame
    Nr:   r^   r  r_   r\   r@   rB   rM   r   )r   r   r   drop)rD   df_catr   r   r   r    s   r  	operationr   rC   c                 C   s   t g d| d jdd}t g d| d jdd}t||g}tg d|dd}| jddgd	d
d }t||t}t	|| d S )N)r   r   r   r   r^   rm  )onetwor  threer_   )rM   rO   rN   r:   r@   r   r!   rT   Tr>   )
r   r  r   r   r   rA   rc  r8   r   r   )r  r  lev_alev_br!   r   r   r   r   r   r    test_seriesgroupby_observed_true  s   r  c                 C   s   t jtddgddtg dddgddgd \}}td	d
tjdtjdg|dd}|dkr5|jddd}| jddg|dd }t	||t
}t|| d S )Nr   r   Fr   r  r  r  r^   r_   r   rN   r:   rM   rO   r@   r  r   r   infer)downcastr>   )r   r   r   	sortlevelr   r   r   fillnarA   rc  r8   r   r   )r  r?   r  r!   _r   r   r   r   r   r   )test_seriesgroupby_observed_false_or_none  s   r  zobserved, index, datar   r   r^   rm  )r  r  r  r  r  r  r  r  r_   )rM   rM   rO   rO   rN   rN   r:   r:   r   r  )r^   r_   Nc                 C   s>   t ||dd}| jddg|dd dd }t|| d S )Nr@   r  r^   r_   r>   c                 S   s   |   |  dS )Nr/   r,   r  rj   r   r   r   rl     s    z8test_seriesgroupby_observed_apply_dict.<locals>.<lambda>)r   rA   rC   r   r   )r  r?   r!   r   r   r   r   r   r   &test_seriesgroupby_observed_apply_dict  s
   .r  c                 C   s<   |  ddgd  }|  ddg d }t|| d S )Nr^   r_   r@   )rA   r-   r   r   )r  r   r   r   r   r   4test_groupby_categorical_series_dataframe_consistent   s   r  code)rM   r   r   )r   r   r   c                 C   sf   t g dg dg dd}tj| tdd}|j|dd }|jj|d	d j}t|| d S )
Nr\   )r+  ra  )rP   r   ra   r   r   r   r  rM   r   r   )	r   r   r   r   rA   r-   Tr   r   )r  rD   r   r   r   r   r   r   test_groupby_categorical_axis_1'  s
   r  z(ignore:.*Select only valid:FutureWarningc                 C   s\   t tddg|dddgdddgd	}| }|jd| d
jt jdd }t|| d S )NBobGregr   rM   rN   )NameItemr  r  rg   r>   T)skipna)	r   r   r   rA   r   r8   r   r   r   )r?   r   rD   r   r   r   r   r   $test_groupby_cat_preserves_structure1  s   r  c                  C   s^   t g dtdd} tjtdd | ddd  W d    d S 1 s(w   Y  d S )	NrV   r:   r9   r   z'vau'rc   r9   c                 S   s&   t | jd d g| jd d gdS )Nr+  r9   vaur  )r   ro   )rowsr   r   r   rl   H  s     z/test_get_nonexistent_category.<locals>.<lambda>)r   r   r>  r?  KeyErrorrA   rC   rO  r   r   r   test_get_nonexistent_categoryC  s   
"r  c                 C   s   | dkr	t d | dkrt jjdd}|j| | dkr tnd }ttt	dt	dd	tt	d
d t	dd	dgd d}t
| |}|rHdnd}|jddg|dd }t|| }	tj|dd |	| }
W d    n1 spw   Y  t|
|ks}J d S )Nngroupngroup is not truly a reductionr&   6TODO: implemented SeriesGroupBy.corrwith. See GH 32293reasonr+   AABBABCDr  r   rN   皙?r:   cat_1cat_2rp  rV  r  r  r>   rp  The 'mad' method is deprecatedrc   )r>  skipmarkxfailnode
add_markerr   r   r   r   r
   rA   rc  r   r   r   )reduction_funcr?   requestr  warnrD   r    expected_lengthseries_groupbyr   r   r   r   r   0test_series_groupby_on_2_categoricals_unobservedN  s,   



r  c                 C   sl  | dkr	t d | dkrt jjdd}|j| | dkr tnd }ttt	dt	dd	tt	d
d t	dd	dgd d}t
dt
dt
dt
dt
dg}t| |}|jddgddd }t|| }tj|dd || }	W d    n1 s{w   Y  t|  }
|D ]}|	j| }t|
rt|s||
ksJ q|
dkr| dkrt|	jtjsJ d S d S d S )Nr  r  r&   r  r  r+   r  r   r  r   rN   r  r:   r  ACBCCACBCCr  r  Fr>   rp  r  rc   r   r8   )r>  r  r  r  r  r  r   r   r   r   tupler
   rA   rc  r   r   -_results_for_groupbys_with_missing_categoriesrM  r   r%  r   
issubdtyper  integer)r  r  r  r  rD   
unobservedr    r  r   r   zero_or_nanr   r   r   r   r   ?test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nansm  s<   
"



 r  c                 C   s   | dkr	t d | dkrtnd }tttdtddttdtddg d	d
}g d}|jddgdd}t| |}tj	|dd t
|| | }W d    n1 sVw   Y  |D ]	}||jvsfJ q]d S )Nr  2ngroup does not return the Categories on the indexr+   r  r   r  111112r  r  r  r  r  )r^   2)r_   r  )r@   1)r@   r  r  r  Tr>   r  rc   )r>  r  r   r   r   r   rA   r
   r   r   rc  r!   )r  r  rD   unobserved_catsdf_grpr    r{  r   r   r   r   >test_dataframe_groupby_on_2_categoricals_when_observed_is_true  s$   

r  c           	      C   s  | dkr	t d | dkrtnd }tttdtddttdtddg d	d
}g d}|jddg|d}t| |}tj	|dd t
|| | }W d    n1 sVw   Y  t|  }|tju rs|j|    sqJ d S |j| |k  sJ d S )Nr  r  r+   r  r   r  r  r  r  r  r  r  r  r>   r  rc   )r>  r  r   r   r   r   rA   r
   r   r   rc  r  r   r   rM  isnullr#   )	r  r?   r  rD   r  r  r    r{  r   r   r   r   ?test_dataframe_groupby_on_2_categoricals_when_observed_is_false  s&   	


r  c                  C   s   g dg dg dd} t | }t|d tddd}||d	< |jd	d
gddd}|d d}|dd }t|| d S )N)rt   r   r:   rM   r   )rK   rI   rK   rI   r   r   r   ru   rP   r   r   T)r   r   r-   )	r   r   r   r   r   rA   r   r   r   )rK   rD   r   r  r   r   r   r   r   3test_series_groupby_categorical_aggregation_getitem  s   r  zfunc, expected_valuesc                 C   sb   t g dg dtg ddd}|d| }t d|itg ddd	d
}t|| d S )Nr   )r   rM   rM   rN   rN   )r   r   r   r   rM   )idr  rp  r  r  rp  rh  rp   rU   )r   r   r   rA   r   r   r   r   )rd  expected_valuesrD   r   r   r   r   r   $test_groupby_agg_categorical_columns  s   
r  c                  C   sx   t dtg dg ddi} t dddgiddgd}| g dtj}t|| | g d }t|| d S )	Nr^   r  r   r  rN   rM   rU   r  )r   r   rA   r   r   r1   r   r   rD   r   r   r   r   r   test_groupby_agg_non_numeric  s   r  rd  c                 C   sl   t dgtdgddj d}|dd }t||  }tdgtdgddd|d jd	}t	|| d S )
Ni  rH   r  r  rC  r^   r_   rp   r!   rT   r  )
r   r   r   
as_orderedrA   rc  r   r  r   r   )rd  rD   
df_groupedr   r   r   r   r   <test_groupby_first_returned_categorical_instead_of_dataframe  s    r  c                  C   sz   t ddg} d| j_tg dtg dt| dd}tdd	d
gitddgddd}|jddd	 }t
|| d S )NrM   rN   F)rM   rO   rP   ra   r   r  rQ   r   r   g      @rH   rp   r   r   )r   r$  flags	writeabler   r   r   r   rA   r-   r   r   )rE   rD   r   r   r   r   r   test_read_only_category_no_sort  s   r  c                  C   s   t g dg dd} | d djjg ddd| d< t d	d	d
d	dd
ddd
ddd
d}|jddd}tg dg ddddd|_| ddg 	 }t
|| d S )N)smalllarger   r   r4  r   r   r4  )r@   r^   r^   r@   r^   r@   r^   r@   )r   r   r   r  )tinyr  r4  r   Tr   r   )r^   r@   rM   rO   rN   r   r!   r   )r   r   rT   r  )r   r   r   set_categoriesrename_axisr   rh   rA   r5   r9  r   r   r  r   r   r   #test_sorted_missing_category_values#  s6   
r  c                  C   s   t dg di} | d d| d< | dj }tg dtg dddd| d jd}t	|| | d
ddi}| }t|| d S )	Ncol_num)rM   rM   rN   rO   r  col_catr  rp   r  r'   )r   r   rA   r  r'   r   r   r  r   r   r   to_framer   rn  r   r   r   1test_agg_cython_category_not_implemented_fallbackP  s   r  c                  C   s   t g dg dddtjdgg dg dd} | dd	i} | d
dgdd }tjddgddggdd}t ddgddgddgd|d}t	|| d S )N)rM   rM   rM   rM   r   r  g?g333333?)r   r   r   fee)r^   r_   numerical_col
object_colcategorical_colr  r  r^   r_   c                 S   s   |    S ri   )r%  r8   rO  r   r   r   rl   t  r   z7test_aggregate_categorical_with_isnan.<locals>.<lambda>rM   rN   rC  r   r   )r
  r  r  r   )
r   r   r   r   rA   r   r   r   r   r   r   r   r   r   %test_aggregate_categorical_with_isnanf  s&   
r  c                  C   s   t g dg dd} tjg ddd}| d || d< | dd t| d	< |  }t g dg dg d
d}|d ||d< |d	 ||d	< t	|| d S )N)rM   rM   rM   rN   rN   rO   )WaitingOnTheWay	Deliveredr  r  r  )
package_idstatus)r  r  r  Tr   r  r  last_status)r  r  r  r  r  r  )r  r  r  )
r   r   CategoricalDtyper   rA   r   r,   r   r   r   )rD   delivery_status_typer   r   r   r   r   test_categorical_transform  s(   r  c                 C   s   t g d}g d}t|||d}t ddg}tj||gddgd}tdtjtjdg|d	d
tdtjtjdg|d	d
d}||  }|rL| tj	}|j
ddg|dd	 }	t|	|  }
t|
| d S N)r   r   rM   rM   )r   rM   rM   r   r   r   rM   r   rH   r   rI   rp   )r'   r*   r>   )r   r   r   r   r   r   NaNr=  r   r  rA   rc  r   r   )rd  r?   r   r   rD   r   r   expected_dictr   srs_grpr   r   r   r   Ftest_series_groupby_first_on_categorical_col_grouped_on_2_categoricals  s   r  c                 C   s   t g d}g d}t|||d}t ddg}tj||gddgd}tdtjtjdg|d	d
tdtjtjdg|d	d
d}||   }|rN| 	tj
}|jddg|d}	t|	|  }
t|
| d S r  )r   r   r   r   r   r   r  r  r=  r   r  rA   rc  r   r   )rd  r?   r   r   rD   r   r   r  r   r  r   r   r   r   Btest_df_groupby_first_on_categorical_col_grouped_on_2_categoricals  s   r  c                  C   s   t tg dg ddtdd} | jddd}|j}tjd	d
gddtjdgddtjg ddd}| | ks=J | D ]}t	|| ||  qAd S )N)rH   rH   r   r   r  rO   )r   r[  r   Fr  r   rM   intpr  rN   )rH   r   rI   )
r   r   r   rA   indicesr   r$  rE  r   ri  )rD   r   r   r   r   r   r   r   2test_groupby_categorical_indices_unused_categories  s   r  c                 C   sj   t dg di}|d d|d< t|dd |  }ttg ddtg dddd}t|| d S )Nr   r  r  rH   rp   )rT   r!   )	r   r   rc  rA   r   r   r   r   r   )rd  rD   r   r   r   r   r   1test_groupby_last_first_preserve_categoricaldtype  s   r   c               	   C   s   t ddgddgddgd} | jdddd} | jd	d
gdd d }tddgttddgd	dtddgd
dgdd}t	|| d S )NrM   rN   rt   rH  r   r  rQ   r  r   rH   Tr>   rI   rp   r   )
r   r   rA   r1   r   r   r   r   r   r   rn  r   r   r   )test_groupby_categorical_observed_nunique  s   r!  c                  C   s   t jddgdd} tddgddgddggddgd	d| i}|dd  }tddgtddgdd
dt jddgddd}t	|| d S )Nr  bigTr   rM   rN   grpdescriptionrg   rp   r  )
r   r  r   r   rA   r,   r   r   r   r   )r  rD   r   r   r   r   r   ,test_groupby_categorical_aggregate_functions  s   
r%  c                 C   s   t ddgg dd}tt ddgg ddddgd}|jd| |d	}| }| r4td
ddgi|d}ntg dg d}td
g di|d}d|j_t|| d S )NrM   rN   r  r  rO   r:   )rk   r[   rk   )r?   r=  r[   rU   )rO   r:   r   )	r   r   rA   r8   r   r!   rT   r   r   )r?   r=  r   rD   r   r   r   r!   r   r   r   test_groupby_categorical_dropna)  s   "r&  )`r   numpyr   r>  pandasr   r   r   r   r   r   r   r	   pandas._testing_testingr   pandas.tests.groupbyr
   r  r"   r  rF   r   r   r   r  parametrizer   filterwarningsr  r  r  r  r  r  r!  r'  r*  r0  r2  r<  rB  r   renamerF  rQ  rU  r]  rf  rj  ro  r|  r  r  r  r  r  r   r  r   NaTr  fixturer  r  r  r   r  r  r  r  r  r  r  r  r  r  r1   r%   r  r  r  r  r  r  r  r  r  boolr  r  r  r   r!  r%  r&  r   r   r   r   <module>   s   $		
! 

%
l '
$

8




	%:[




"
"
,	

	
-
#

-5
