U
    sVc*                     @   sf  d dl m Z  d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ ejfddZejejd ejejejejejejejejejejejd ejejejd ejejd ejdZdd	 Zd
d Zdd Zdd Zejdddgdd Zejddd Zdd Zdd Zdd Z dd Z!d d! Z"d"d# Z#ejdddgejd$ddgejd%ddgd&d' Z$d(d) Z%d*d+ Z&d,d- Z'd.d/ Z(d0d1 Z)ejd2d3gee*d4d d5gd6d7gd8fee*d4+d9d: d7gd d6gd8fee*d;d d5gd6d7gd8fgd<d= Z,d>d? Z-d@dA Z.dBdC Z/ejdDdEdFdEgfdGdHdIgfdJdHdEgfdKdFdIgfgdLdM Z0dNdO Z1dPdQ Z2dRdS Z3dTdU Z4dVdW Z5dXdY Z6dZd[ Z7d\d] Z8ejd^dee
j9ed6d6d7gd_d`d6d7d7ggdadbgdcd6d7d5gdddefdeed6d6d7gd_d`d6d7d7gd6d7d5gdffgdgdh Z:ejdidej;ej<gdjdk Z=ej>dldm Z?ejdndodpgdqdr Z@ejdndodpgejd$ddgdsdt ZAejdude
9e	dvgd4 dwgd4  d_dxdye	dzdzd{d{dzdzd|d|gd_d}dye	dJdKgd4 gd6d6d5d5d7d7d4d4gfde
jBedwdvgdd~edzd|d{gdd~e	dJdKggdxd}dgdcd7d7d4d4ej;ej;d6d6ej;ej;d5d5gfde
jBedwdvgdd~edzd|d{gdd~e	dJdKggdxd}dgdcd7d7d4d4ej;ej;d6d6ej;ej;d5d5gfgdd ZCdd ZDejdd6d d gd d d ggdd ZEejddd ZFdd ZGdd ZHdd ZIdd ZJejd$ddgdd ZKdd ZLejdejMd6d6d7gfejNd6d7d7gfgdd ZOdd ZPejddEdGgdd ZQdd ZRdd ZSdd ZTdd ZUdd ZVejddEdGgeWeXdddZYejddEdGgeWeXdddZZdd Z[ejddEdGgdd Z\dd Z]dd Z^dd Z_dS )    )datetimeN)CategoricalCategoricalIndex	DataFrameIndex
MultiIndexSeriesqcut)get_groupby_method_argsc                 C   s.   dd }t jt|||d}| j||d S )zpReindex to a cartesian production for the groupers,
    preserving the nature (Categorical) of each grouper
    c                 S   s4   t | ttfr0| j}tjtt||| jd} | S )N
categoriesordered)	
isinstancer   r   r   
from_codesnparangelenr   )ar    r   I/tmp/pip-unpacked-wheel-xj8nt62q/pandas/tests/groupby/test_categorical.pyf   s      z)cartesian_product_for_groupers.<locals>.fnames
fill_value)r   from_productmapreindex
sort_index)resultargsr   r   r   indexr   r   r   cartesian_product_for_groupers   s    r"   )allanycountcorrwithfirstZidxmaxZidxminlastmadmaxmeanZmedianminnthnuniqueprodZquantileZsemsizeZskewstdsumvarc                 C   sB   t | jd}dd }| j|ddj|}|jjd dks>td S )N   c                 S   s   |   |  |  |  dS )Nr,   r*   r%   r+   r5   )groupr   r   r   	get_statsI   s
    z2test_apply_use_categorical_name.<locals>.get_statsFobservedr   C)r	   r:   groupbyDapplyr!   r   AssertionError)dfcatsr7   r   r   r   r   test_apply_use_categorical_nameF   s    rA   c               
   C   s  t dddddddddg	ddddgdd} tddddddd	d
dg	| d}ttdddd}tdddd
tjgi|d}|jddd }t	|| t ddddgdddgdd}t ddddgdddgdd}t||ddd	d
gd}|jddd}tdddgddd}	tdt
d	ddg|	di}d}
tjt|
d | }W 5 Q R X t	|| tddgddgddggddgd}t |j|d< |jdgdd}|d d! }t	||dg  |d}|jddg }t	|| d"d# }||}|jddg  }tddgdd$|_|d d%|d< t	|| tddd&d'gi}tj|jdd(d)d*d+gd,}|jj|ddt}t||d  t|jj|ddd-d! |d  t	|j|ddt|dg  |j|dd}tjtd.dd/ |d0d! }W 5 Q R X t	||dg  td : |d1d! }|t}|tjj}|d2d! }W 5 Q R X tj	||dg dd3 tj	||dg dd3 t	||dg  t	||dg  t|jj|ddtj|d  t	|j|ddtj| tddd&d'd4gi}tj|jd5dd(d)d*d+gd,}|jj|ddt}t||d  t|jj|ddd6d! |d  t	|j|ddt|dg  t	|j|ddd7d! |dg  tdddddgi}tj|jdddd	d
gt tdd8}|j|ddt }t|j!j"|j!j#d9}t
ddddg|d}d|j_$t|| d:d;d<d=g}tj%j&dd
d>d?}t j'||dd9} ttj%(d>d
}|j| dd }|jt)| dd }t|| j"dd}	|*|	}t	|| |j| dd}|+ }| j,- }t)| .|}|.|}t |dd:d;d<d=gd@}|j|dddA+ }t	|| t j't/d
0dB|dd9}t|}t1|2 j3d| tdCdDdEdFdGdHdIdJgd
 }t1|2 j3d| d S )KNr   bcdTr            r4      r   rB   abcdnamer   r!   Fr8   zyABvaluesrQ   rS      r   !The default value of numeric_onlymatchzJohn P. Doez	Jane DoveZ	person_idperson_namecolumnsc                 S   s   | S Nr   xr   r   r   <lambda>x       ztest_basic.<locals>.<lambda>c                 S   s   |  djd S )NrX   r   )drop_duplicatesilocr\   r   r   r   r      s    ztest_basic.<locals>.frL   object      
         (   )binsc                 S   s
   t | S r[   r   r2   xsr   r   r   r^      r_   z
scalar maxrW   Zcheck_stacklevelc                 S   s
   t | S r[   r   r*   rl   r   r   r   r^      r_   c                 S   s   t j| ddS )Nr   Zaxisro   rl   r   r   r   r^      r_   c                 S   s   t j| S r[   )r   maximumreducerl   r   r   r   r^      r_   )Zcheck_dtypeic                 S   s
   t | S r[   rk   rl   r   r   r   r^      r_   c                 S   s
   t | S r[   rk   rl   r   r   r   r^      r_   )labelsr   foobarbazquxd   r0   )r   r   sortr9      r%   r+   r1   r,   25%50%75%r*   )4r   r   r   listr   nanr;   r+   tmassert_frame_equalr   assert_produces_warningFutureWarningr2   rX   	transformr`   ra   r=   copyr   r!   astypepdcutr   assert_series_equalr*   rq   rr   filterr#   r   rS   r   r   rL   randomrandintr   randnasarrayr   describecodesargsorttaker   repeatassert_index_equalstackget_level_values)r@   data	exp_indexexpectedr   cat1cat2r?   gbZexp_idxmsgr]   gr   rC   Zgbcresult2Zresult3Zresult4Zresult5levelsr   groupeddesc_resultidx
ord_labelsord_dataZexp_catsexpcexpr   r   r   
test_basicU   s    
 

 "  
$ " $


  
r   c                 C   s   t tdddttddgtdgdgd dgd  tdgd	d
gdd}|jd	g| d}t tdddttddgtdgdgd tdgd	d
gdd}|d}t	|| d S )NrF      r   rB   rf   r   rH   rE   ZIndex1ZIndex2)r   r   r   r   r!   levelr9      )
r   r   r   r   r   ranger;   	get_groupr   r   )r9   r?   r   r   r   r   r   r   test_level_get_group   s&    
r   c                  C   s   t dgd dgd  dddgd tdd	} t| jdddgd
d| _| dd  }|jdd
d}ddddddg}t|dddgd
d}ddddddgt|g}t	j
|ddgd}tdgd |dd}t|| d S )NrQ      rR   highZmedlowr4   g      (@)r6   doseZoutcomesTr   r6   r   r   )r   Zsort_remainingr   rF   r!   rL   )r   r   r   r   r   r;   Zvalue_countsr   r   r   from_arraysr   r   r   r?   r   r!   r   r   r   r   (test_sorting_with_different_categoricals  s    r   r   TFc           	   	   C   s$  t td| d}t tdddg| d}tt|}t|||d}|jdd	gd
d}tj||gdd	gd}tdddg|dgd}t	j
tddd |dd }W 5 Q R X t	|| | }t	|| |tj}t	|| tj||gdd	gd}td|d}|dd }t	|| d S )Nabcru   Zaaar   rB   r   )missingdenserS   r   r   Tr8   r   r   rE          @rS   r!   rZ   zSelect only validFrn   c                 S   s
   t | S r[   )r   r+   r\   r   r   r   r^   0  r_   ztest_apply.<locals>.<lambda>rM   c                 S   s   dS NrE   r   r\   r   r   r   r^   <  r_   )r   r   r   r   r   r   r;   r   r   r   r   r   r=   r   r+   aggr   r   )	r   r   r   rS   r?   r   r   r   r   r   r   r   
test_apply  s,      r   z.ignore:.*value of numeric_only.*:FutureWarningc              	   C   s  t ddddgdddgdd}t ddddgdddgdd}t||d	d
ddgd}ddgd
 |d< |jdddg| d}tj||ddgd
 gdddgd}tdtd	d
ddg|di }| }| st|||ddggt	ddd}t
|| |jddg| d}tj||gddgd}tdd	d
ddgi|d}| }| sRt|||gt	ddd}t
|| t ddddgdddgddd	d	d
d
gddddgd}t|}|jd | d}	|	 }tt	d!d t	d"dd#}td$d$gd%dgd&|d}| s
tt	d"d t	d"dd#}
||
}t
|| |jd d'g| d}|d(}td)d*d%d+gt ddddgdddgddd	d
d	d
gd,d d'g}| st||jjd	d
ggd d'g}t
|| d-D ]<}|\}}||}||j|k|j|k@  }t
|| qdd.dd.dd	d	gddddd/d0d1gddd2ddddgd3}t|}t|d tddd}||d4< |jd4d5gd6| d7}|d(}|jd4d5gd| d7}|d( }t
|| d S )8Nr   rB   rN   Tr   rC   rD   rO   rE   rF   rG   r4   rP   rv   rw   r:   rQ   rR   r8   r   rS   rM   ABCr   r   ABrf   rg   rh   ri   )catintsvalr   abr   )rL   r   r   g      ?      4@)r   r   r   r+         $@g      >@g      D@)r   r   r   ))r   rE   )rB   rF   )rB   rE   )r   rF   r~   2   <   F   erv   rw   rx   r   rx   Fas_indexr9   )r   r   r;   r   r   r   r   r2   r"   r   r   r   r+   r   r   r   	set_indexr   rS   r   r   r   r   r   linspacereset_index)r9   r   r   r?   r   r   r   r   rD   Zgroups_single_keyr!   groups_double_keykeyrC   ir   groupsZgroups2r   r   r   test_observed@  s    	        
  

      



  
	  

r   c                 C   s   ddddgddddgdddd	gd
}t |}t|d ddddg}d|_|j|dg| d}tj|ddddggddgd}t ddddgddddgd|d}| st||jddddggddg}|	d}t
|| d S )NrG   r4   rH   rE   rF   rf   rz      "   )C1C2C3r   r   r   r   r8   r   g      @g      @g      @r   g      Y@g      i@g      A@)r   r   rM   r+   )r   r   r   rL   r;   r   r   r"   rS   r   r   r   )r9   rD   r?   rS   r   r   r   r   r   r   r   test_observed_codes_remap  s$    $   
r   c                  C   s   t tjjddddtjjddddtjjdddddd} | jtd| d< | jdd	d
gdd}| }|j	j
d  | j kst|j	j
d  | j kst|j	j
d  | j kstd S )Nr      i0u  r{   '  )r   int_idother_idrv   categoryr   r   r   Tr8   rE   rF   )r   r   r   r   r   r   strr;   r%   r!   r   r.   r>   r   r   )r?   r   r   r   r   r   test_observed_perf  s    r   c                 C   s   t dddgdddgd}t|dddgd}|jd	| d
}|j}| rftddgddtdgddd}n*tddgddtg ddtdgddd}t|| d S )Nr   rC   rB   r   rE   rF   rG   r   valsr   r8   r   int64dtype)r   rC   r   rB   rC   )r   r   r;   r   r   r   assert_dict_equal)r9   r   r?   r   r   r   r   r   r   test_observed_groups  s    "
r   c                 C   s   t tdtjdgdddgddddgd}|jd	| d
}|j}| rXdtddgddi}n(tddgddtg ddtg ddd}t|| d S )Nr   rB   rD   r   rE   rF   rG   r   r   r8   r   r   r   )r   rB   rD   )	r   r   r   r   r;   r   r   r   r   )r9   r?   r   r   r   r   r   r   test_observed_groups_with_nan  s    

r   c                  C   s   t dtjtjgdddgd} tdddg}t| |d}|jd	d
dd d}t dddgdddgd}tdtjtjg|dd}d	|j_t	
|| d S )Nr   rB   rC   r   rE   rF   rG   )r   serr   Fr8   r   r   r   )r   r   r   r   r   r;   r-   r!   rL   r   r   )r   r   r?   r   r!   r   r   r   r   test_observed_nth   s    r   c                 C   s   t tjdtjdgdddgd}tddddg}t||d	}|jd
| d  }| rxtt dgdddgddgd	}n,tt dddgdddgddtjtjgd	}t	|| d S )Nr   rB   rC   r   rE   rF   rG   r4   )s1s2r   r8   )
r   r   r   r   r   r;   r'   r   r   r   )r9   r   r   r?   r   r   r   r   r   #test_dataframe_categorical_with_nan  s    r   r9   r}   c           	      C   s   t ddddddgddddg| d}tddddddg}t||d}|jd||dd	 d
}t|jjdd}t|j}|sd|| < t||ksd|  d| d| d| }dst	|d S )NrD   r   rB   r   r   )labelr   r   )r9   r}   r   r'   rc   r   zDLabels and aggregation results not consistently sorted
for (ordered=z, observed=z, sort=z
)
Result:
F)
r   r   r   r;   	aggregater!   arrayisnar#   r>   )	r   r9   r}   r   r   r?   r   Zaggrr   r   r   r   0test_dataframe_categorical_ordered_observed_sort#  s     	

r   c               	   C   s  t jddd} tjjdddd}tj|| dd}ttjdd}|j	|d	d

 }|j	t|d	d

 }|| }t|j|jdd|_t|| |j	|d	d
}| }|j }||}	||}
|
j	|	d	d
 }t|| t|j|j t|jd|jd tjtdd| dd}t|}t| jd| tddddddddgd }t| jd| d S )Nz
2014-01-01r4   )periodsr   rz   r{   Tru   Fr8   r   r~   r%   r+   r1   r,   r   r   r   r*   rE   )r   
date_ranger   r   r   r   r   r   r   r;   r+   r   r   r   r!   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r@   r   r   r   r   r   r   r   r   r   r   r   r   r   test_datetimeF  s<    
  



 
r   c                  C   s(  t jd} ddddg}| jdddd	}tj||d
d}tt t dd	ddt
dd}||d< |djddd }|t
d j|jdd }ttjddddg|d
ddd|_t|| |jddd }|t
d j|jdd }ttjddddg|d
ddd|_t|| d S )Ni90  rv   rw   rx   ry   r   r4   rg   r{   Tru   rJ   rY   r@   Fr   r8   rE   rF   rG   rb   )r   r   ZRandomStater   r   r   r   r   r   Zreshaper   r   r;   r2   r   r   r!   r   r   )sr   r   r@   r?   r   r   r   r   r   test_categorical_indexm  s(    &  r  c                  C   sz   t ddddgddddgdd} ttjdd| d	}|d
dddgd  }t|	 j
|  t|	 j
j| j d S )Nry   rv   rx   rw   Tr   rg   r4   rY   rE   rF   rG   rH   )r   r   r   r   r   r;   r   r   r   r   rZ   assert_categorical_equalrS   )r@   r?   r   r   r   r   !test_describe_categorical_columns  s    

r  c                  C   s   t tdddgd tdd d} | d d	| d< | jd
dgddd   }| }tddgddd}t	
|j| t	|jj|j |d |d  }tddgtddgd
dd}t	|| d S )Nrf   rQ   rR   rH   ZXYXXYrF   )r   mediumartistr  r   r  Fr8   r   r   rL   r   r4   XYrb   rM   )r   r   r   r   r;   r%   unstackr   r   r   r   rZ   r  rS   r   r   r   )r?   Zgcatr   Zexp_columnsr   r   r   r   test_unstack_categorical  s    r  c                  C   s^   t tjtjddddddddg
} t|  jd}tjt	dd | 
|  W 5 Q R X d S )NrE   rF   rG   r4   z$Grouper and axis must be same lengthrV   )r   r   r   r   r   dropnarS   pytestraises
ValueErrorr;   r+   )seriesrj   r   r   r   test_bins_unequal_len  s     r  r  r   r4   rG   rE   rF   rQ   rR   c                 C   s   | d S r   r   )r   r   r   r   r^     r_   r^   rT   c                 C   sD   |  ttddd}|t}t|t| d}t|| d S )NZABBAr   r   rM   )r;   r   r   r   r   keysr   r   )r  r   r;   r   r   r   r   r   test_categorical_series  s    
r  c                     s  t tdddgdddgdddgdddgd	  jd
dgddd } t tddg jjjdddgddgd	d
ddgd}t| |  fdd} jd
|gddd } t tddg jjjdddgddgd	d
ddgd}t| | tdddgd
d} jd
|gddd } t| | d
dg}t tddg jjjdddgddgd	d
ddgd}dD ]8}t	t
d|d _ j|ddd } t| | q`d S )NrE   rF   rG   rf      e   f   g   )r   rQ   rR   r   rQ   FTr   r      rR   rY   c                    s    j | df S )NrQ   )loc)rr?   r   r   r^     r_   ztest_as_index.<locals>.<lambda>r   r   rB   rb   )Nr  rR   r   )r   r   r;   r2   r   r   r   r   r   r   r   r!   )r   r   r   r  Zgroup_columnsrL   r   r  r   test_as_index  sP    	r  c                  C   s   t d} tdtt d| ddi}t| | ddd}t|jdddd j| t|jdddd j| tdtt d| ddi}t| | ddd}tt d	t d	ddd}t|jdddd j| t|jdddd j| d S )
Nr   rQ   baTr   r  Fr|   bac)	r   r   r   r   r   r   r;   r'   r!   )r   r?   r!   r   Znosort_indexr   r   r   test_preserve_categories   s,        r   c               
   C   s  t dddddgdddddgttdtd	d
dttdtd	ddd} t ddtjgddtjgttd	td	d
dttd	td	ddd}dD ]t}d}tjt|d2 | j|d
d
d	 }| j|dd
d	 
 }W 5 Q R X |j|jd}t|| t|| qd S )NrE   rF   rf      r      r   Zabaabr  Fr   T)rQ   rR   r   r   r   g      ?g      9@r   )r   r   rU   rV   )byr   r9   rY   )r   r   r   r   r   r   r   r   r;   r+   r   r   rZ   r   )r?   Zexp_fullcolr   Zresult1r   r   r   r   r   test_preserve_categorical_dtype  s.    	

r%  zfunc, valuesr'   secondr(   fourththirdr,   r*   c                 C   s   t ddddgdd}tddddg|d	}|d
}t||  }tddgt||jdd	d
}t|| |d
d }t||  }|d }t	|| d S )Nr'   r&  r(  r'  Tru   r   )payloadr$  r*  r   r$  )
r   r   r;   getattrr   r   r   r   r   r   )funcrS   rC   r?   r   r   r   Zsgbr   r   r   test_preserve_on_ordered_ops9  s    
r-  c                  C   s  t tjd} tdddddddddg	}tj|dddgdd}| j|dd }| j|dd }t	|j
|j|jd	|_
t|| tddddddd
d
d
g	}tj|dddd
gdd}| j|dd }| j|dd |j}t	|j
|j|jd	|_
t|| tdddddddddg	ddddgdd	}tddddddd
ddg	|d} | jddd }|d j}tdddtjg}t|| d S )N	   r   rE   rF   Tru   Fr8   r   rG   r   rB   rC   rD   r4   rH   rI   )r   r   r   r   r   r   r   r;   r+   r   r!   r   r   r   r   r   r   rS   r   assert_numpy_array_equal)r   r   r@   r   r   r   r   r   test_categorical_no_compressU  s>        
 
r0  c                  C   sd   t d gd tdddgd} | d d }ttg ddgdtg ddd	dd
}t|| d S )NrG   Ztraintestr  rQ   rR   r   rc   r   rL   r   )r   r   r;   r'   r   r   r   r?   r   r   r   r   r    test_groupby_empty_with_categoryz  s    r4  c                  C   s   t dtjdddi} dd tdddD }t||}| jdgdd	} tj| j	tdd
dd|d| d< | j
dgddd  }|t|jdd d }t|j|jjd|_t|| d S )Nvaluer   r   rz   c                 S   s   g | ]}| d |d  qS )z - i  r   ).0r   r   r   r   
<listcomp>  s     ztest_sort.<locals>.<listcomp>i  T)r#  Z	ascendingi)  F)rightrt   Zvalue_groupr8   c                 S   s   t |  d S )Nr   )floatsplitr\   r   r   r   r^     r_   ztest_sort.<locals>.<lambda>)r   rb   )r   r   r   r   r   r   Zsort_valuesr   r   r5  r;   r%   sortedr!   r   rL   r   r   )r?   rt   Z
cat_labelsresr   r   r   r   	test_sort  s    
 
  
r=  c               
   C   s  t dddgdddgdddgdd	d
gdddgdddgdddggdddgd} t| d dd| d< tddddgddd}t ddgddgd	d
gddggddg|d}d}| j|ddd }t|| |}| j|ddd }t|| t| d dd| d< tddddgdd}t ddgddgd	d
gddggddg|d}tddddgddddgdd}t ddgddgd	d
gddgg|ddgd}d}| j|ddd }t|| | j|ddd }t|| d S )Nz	(7.5, 10]rf   r~   rg   z(2.5, 5]rH   rh   z(5, 7.5]r   ri   r4   r   z(0, 2.5]rE   r   rT   r   r   rv   rw   rY   Tru   rK   )rZ   r!   Fr|   rb   r   rL   r   )r   r   r   r;   r'   r   r   )r?   r!   Zexpected_sortr$  result_sortZexpected_nosortresult_nosortr   r   r   
test_sort2  sj    	
    
   

  rA  c                  C   s  t tdddtdddtdddtdddtdddtdddtdddgddddd	ddgdd
dddddgddddgd} t| d dd| d< tdddtdddtdddtdddg}t ddgddgddgddggddgd}t|ddd|_tdddtdddtdddtdddg}t ddgddgddgddggddgd}t||ddd|_d}t|| j|ddd  t|| j|ddd  t| d dd| d< tdddtdddtdddtdddg}t ddgddgddgddggddgd}t|dd|_tdddtdddtdddtdddg}t ddgddgddgddggddgd}t||dd|_d}t|| j|ddd  t|| j|ddd  d S )Ni  rT   rE   rF   rH   rf   r~   r   r4   rg   rh   ri   r   r   r   )dtrv   rw   rB  rv   rw   rY   Tru   rK   )r   rL   r   Fr|   rb   r>  )	r   r   r   r   r!   r   r   r;   r'   )r?   r!   r?  r@  r$  r   r   r   test_sort_datetimelike  s    






	



 



      



 



   rC  c                  C   s   t tdddgdddgddddgd} tdddgdd	}| jdd
dj }tdddg|dd	}t|| | jdd
djjdd}tdddg|dd	}t|| | jdd
djjdd}tddt	j
g|dd	}t|| | jdd
djjdd}tdt	j
t	j
g|dd	}t|| d S )Nr   rB   rC   r   rE   rF   r  rQ   rb   Fr8   rG   r   rR   Z	min_count)r   r   r   r;   rR   r2   r   r   r   r   r   r?   Zexpected_idxr   r   r   r   r   test_empty_sum4  s     "rF  c                  C   s   t tdddgdddgddddgd} tdddgdd	}| jdd
dj }tdddg|dd	}t|| | jdd
djjdd}tdddg|dd	}t|| | jdd
djjdd}tddt	j
g|dd	}t|| d S )Nr   rB   rC   r   rE   rF   r  rQ   rb   Fr8   rR   r   rD  )r   r   r   r;   rR   r/   r   r   r   r   r   rE  r   r   r   test_empty_prodP  s    "rG  c                  C   s   t ttdtttjddddd tdd} | dd	g }t	j
td
ddgttjddddgdd	gd}t ddddddddtjdg	i|d}t|| d S )NZ	abcbabcbaz2018-06-01 00Z1TrG   )freqr   r.  )key1key2rS   rI  rJ  r   rB   rC   r   rS   r   r4   r~   rH   r   rF   rM   )r   r   r   r   r   r   r   r;   r+   r   r   r   r   r   )r?   r   r   r   r   r   r   ,test_groupby_multiindex_categorical_datetimeh  s"    
	$rK  zas_index, expectedr   r   r   rB   r   r]   )r!   r   rL   r   rB   r]   c                 C   sV   t tdddgdddddgdddgd}|jddg| d	d
d  }t|| d S )NrE   rF   r   r   rG   rL  r   rB   Tr   r]   )r   r   r;   r2   r   assert_equal)r   r   r?   r   r   r   r   ,test_groupby_agg_observed_true_single_column  s
    $rN  r   c                 C   sZ   t ddddgddddgdd}t d dddgddddgdd}|jd| d}t|| d S )	Nr   rB   rC   rD   Fr   rE   r   )r   shiftr   rM  )r   ctr   r<  r   r   r   
test_shift  s    
 
 
 
 rQ  c                 C   s\   |   dd }|d d|d< |d d|d< tddddg|d	< |jd
gdd}|S )a  
    DataFrame with multiple categorical columns and a column of integers.
    Shortened so as not to contain all possible combinations of categories.
    Useful for testing `observed` kwarg functionality on GroupBy objects.

    Parameters
    ----------
    df: DataFrame
        Non-categorical, longer DataFrame from another fixture, used to derive
        this one

    Returns
    -------
    df_cat: DataFrame
    Nr4   rQ   r   rR   rE   rF   rG   r:   r<   rp   )r   r   r   Zdrop)r?   df_catr   r   r   rR    s    rR  	operationr   r=   c                 C   s   t ddddg| d jdd}t ddddg| d jdd}t||g}td	d
ddg|dd}| jddgddd }t||t}t	|| d S )Nrv   rw   rQ   r2  onetwothreerR   rE   rG   rF   r4   r:   r   r!   rL   Tr8   )
r   r   r   r   r   r;   r+  r2   r   r   )rR  rS  Zlev_aZlev_br!   r   r   r   r   r   r    test_seriesgroupby_observed_true  s    rX  c                 C   s   t jtddgddtdddgddgdd	gd
 \}}tddtjdtjdg|dd}|dkrl|jddd}| jdd	g|dd }t	||t
}t|| d S )Nrw   rv   Fru   rT  rV  rU  rQ   rR   r   rF   r4   rE   rG   r:   rW  r   r   Zinfer)Zdowncastr8   )r   r   r   Z	sortlevelr   r   r   Zfillnar;   r+  r2   r   r   )rR  r9   rS  r!   _r   r   r   r   r   r   )test_seriesgroupby_observed_false_or_none  s    rZ  zobserved, index, datarv   rw   rQ   r2  rT  rU  rV  rR   ru   c                 C   s>   t ||dd}| jddg|dd dd }t|| d S )Nr:   rW  rQ   rR   r8   c                 S   s   |   |  dS )Nr,   r*   r[  r\   r   r   r   r^     r_   z8test_seriesgroupby_observed_apply_dict.<locals>.<lambda>)r   r;   r=   r   r   )rR  r9   r!   r   r   r   r   r   r   &test_seriesgroupby_observed_apply_dict  s
    .r\  c                 C   s<   |  ddgd  }|  ddg d }t|| d S )NrQ   rR   r:   )r;   r+   r   r   )rR  r   r   r   r   r   4test_groupby_categorical_series_dataframe_consistent   s    r]  codec                 C   sr   t ddddgddddgd	d
ddgd}tj| tdd}|j|dd }|jj|dd j}t|| d S )NrE   rF   rG   r4   r   r)  rH   r   rT   r~   r   r   r   rp   r   )	r   r   r   r   r;   r+   Tr   r   )r^  r?   r   r   r   r   r   r   test_groupby_categorical_axis_1'  s
    (rb  z(ignore:.*Select only valid:FutureWarningc                 C   s\   t tddg|dddgdddgd	}| }|jd| d
jt jdd }t|| d S )NZBobZGregru   rE   rF   )NameItemrc  rd  rY   r8   T)Zskipna)	r   r   r   r;   r   r2   r   r   r   )r9   r   r?   r   r   r   r   r   $test_groupby_cat_preserves_structure1  s     re  c               	   C   sL   t ddddgtdd} tjtdd | ddd	  W 5 Q R X d S )
Nr   rB   r4   r3   r   z'vau'rV   r3   c                 S   s&   t | jd d g| jd d gdS )Nr   r3   Zvaurf  )r   ra   )Zrowsr   r   r   r^   H  s    z/test_get_nonexistent_category.<locals>.<lambda>)r   r   r  r  KeyErrorr;   r=   r  r   r   r   test_get_nonexistent_categoryC  s
    
rh  c              	   C   s   | dkrt d | dkr4t jjdd}|j| | dkr@tnd }ttt	dt	dd	tt	d
d t	dd	dgd d}t
| |}|rdnd}|jddg|dd }t|| }	tj|dd |	| }
W 5 Q R X t|
|kstd S )Nngroupngroup is not truly a reductionr&   6TODO: implemented SeriesGroupBy.corrwith. See GH 32293reasonr)   AABBABCDr   r   rF   皙?r4   cat_1cat_2r5  r!  rr  rs  r8   r5  The 'mad' method is deprecatedrV   )r  skipmarkxfailnode
add_markerr   r   r   r   r
   r;   r+  r   r   r   r>   )reduction_funcr9   requestrv  warnr?   r    Zexpected_lengthseries_groupbyr   r   r   r   r   0test_series_groupby_on_2_categoricals_unobservedN  s*    


r~  c              	   C   sX  | dkrt d | dkr4t jjdd}|j| | dkr@tnd }ttt	dt	dd	tt	d
d t	dd	dgd d}t
dt
dt
dt
dt
dg}t| |}|jddgddd }t|| }tj|dd || }	W 5 Q R X t|  }
|D ]0}|	j| }t|
rt|s||
kstq|
dkrT| dkrTt|	jtjsTtd S )Nri  rj  r&   rk  rl  r)   rn  r   r   r   rF   rp  r4   rq  ZACZBCCAZCBCCrr  rs  Fr8   r5  rt  rV   r   r2   )r  ru  rv  rw  rx  ry  r   r   r   r   tupler
   r;   r+  r   r   -_results_for_groupbys_with_missing_categoriesr  r   r   r>   r   Z
issubdtyper   integer)rz  r{  rv  r|  r?   Z
unobservedr    r}  r   r   Zzero_or_nanr   r   r   r   r   ?test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nansm  s4    
"


$r  c              	   C   s   | dkrt d | dkrtnd }tttdtddttdtddd	d	d	d	gd
}ddddg}|jddgdd}t| |}tj	|dd t
|| | }W 5 Q R X |D ]}||jkstqd S )Nri  2ngroup does not return the Categories on the indexr)   rn  r   r   111112rp  rq  rQ   2rR   r  r:   1r:   r  rr  rs  Tr8   rt  rV   )r  ru  r   r   r   r   r;   r
   r   r   r+  r!   r>   )rz  r|  r?   unobserved_catsdf_grpr    r<  r   r   r   r   >test_dataframe_groupby_on_2_categoricals_when_observed_is_true  s     


r  c           	   	   C   s   | dkrt d | dkrtnd }tttdtddttdtddd	d	d	d	gd
}ddddg}|jddg|d}t| |}tj	|dd t
|| | }W 5 Q R X t|  }|tjkr|j|    stn|j| |k  std S )Nri  r  r)   rn  r   r   r  r  rp  rq  r  r  r  r  rr  rs  r8   rt  rV   )r  ru  r   r   r   r   r;   r
   r   r   r+  r  r   r   r  Zisnullr#   r>   )	rz  r9   r|  r?   r  r  r    r<  r   r   r   r   ?test_dataframe_groupby_on_2_categoricals_when_observed_is_false  s$    	



r  c                  C   s   ddddgddddgdd	dd	gd
} t | }t|d tddd}||d< |jddgddd}|d d}|dd }t|| d S )Nrf   r~   r4   rE   rg   rh   ri   rD   rC   r   rv   r   rH   r   rx   T)r   r}   r+   )	r   r   r   r   r   r;   r   r   r   )rD   r?   r   r   r   r   r   r   r   3test_series_groupby_categorical_aggregation_getitem  s    $r  zfunc, expected_valuesc              	   C   sv   t dddddgdddddgtdddddgdd}|d| }t d	|itdddgdd
d}t|| d S )Nr   rE   rF   rG   r4   )idr   r5  r  r   r5  rb   rM   )r   r   r   r;   r   r   r   r   )r,  Zexpected_valuesr?   r   r   r   r   r   $test_groupby_agg_categorical_columns  s     r  c                  C   s   t dtdddgdddgdi} t dddgiddgd}| dddgtj}t|| | dddg }t|| d S )	NrQ   r   rB   rC   r   rF   rE   rM   )r   r   r;   r   r   r.   r   r   r?   r   r   r   r   r   test_groupby_agg_non_numeric  s     r  r,  c                 C   sl   t dgtdgddj d}|dd }t||  }tdgtdgddd|d jd	}t	|| d S )
Ni  rB   r   r   r  rQ   rR   rb   r!   rL   r   )
r   r   r   Z
as_orderedr;   r+  r   r   r   r   )r,  r?   Z
df_groupedr   r   r   r   r   <test_groupby_first_returned_categorical_instead_of_dataframe  s        r  c                  C   s   t ddg} d| j_tddddgtddddgt| dd}td	d
dgitddgddd}|jddd	 }t
|| d S )NrE   rF   FrG   rH   rT   r   rI   r   r   g      @rB   rb   r   r}   )r   r   flagsZ	writeabler   r   r   r   r;   r+   r   r   )r@   r?   r   r   r   r   r   test_read_only_category_no_sort  s    $
 r  c               
   C   s   t ddddddddgddddddddgd} | d djjd	dddgd
d| d< t ddddddddddddd}|jddd}td	dddgd	dddgd
ddd|_| ddg 	 }t
|| d S )Nsmalllarger  r:   rQ   )rv   rw   rv   r   tinyTru   r   )rQ   r:   rE   rG   rF   )r  r  r  r  rw   r!   rp   )r   r   rL   r   )r   r   r   Zset_categoriesZrename_axisr   rZ   r;   r0   r
  r   r   r  r   r   r   #test_sorted_missing_category_values#  sJ    

 

r  c                  C   s   t dddddgi} | d d| d< | dj }tdddgtdddgddd| d jd}t	|| | d
dd	i}| }t|| d S )
NZcol_numrE   rF   rG   r   col_catrb   r  r'   )r   r   r;   r  r'   r   r   r   r   r   r   to_framer   r3  r   r   r   1test_agg_cython_category_not_implemented_fallbackP  s    r  c               	   C   s   t ddddgddddgddtjdgddddgddddgd	} | d
di} | ddgdd }tjddgddggdd}t ddgddgddgd|d}t	|| d S )NrE   rF   rp  g?g333333?rv   rw   Zfee)rQ   rR   numerical_col
object_colcategorical_colr  r   rQ   rR   c                 S   s   |    S r[   )r   r2   r  r   r   r   r^   t  r_   z7test_aggregate_categorical_with_isnan.<locals>.<lambda>r  r   r   )r  r  r  r   )
r   r   r   r   r;   r   r   r   r   r   r   r   r   r   %test_aggregate_categorical_with_isnanf  s&    




r  c               	   C   s   t ddddddgddddddgd} tjdddgdd	}| d
 || d
< | dd
 t| d< |  }t ddddddgddddddgddddddgd}|d
 ||d
< |d ||d< t	|| d S )NrE   rF   rG   ZWaitingZOnTheWayZ	Delivered)
package_idstatusTr   r  r  last_status)r  r  r  )
r   r   CategoricalDtyper   r;   r   r*   r   r   r   )r?   Zdelivery_status_typer   r   r   r   r   test_categorical_transform  sN     	r  )r,  r9   c                 C   s   t ddddg}ddddg}t|||d}t ddg}tj||gddgd}tdtjtjdg|ddtdtjtjdg|ddd	}||  }|r| tj	}|j
ddg|d
d }	t|	|  }
t|
| d S Nr   rE   r   r   rB   r   rC   rb   )r'   r(   r8   )r   r   r   r   r   r   NaNr  r   r   r;   r+  r   r   )r,  r9   r   r   r?   r   r   expected_dictr   Zsrs_grpr   r   r   r   Ftest_series_groupby_first_on_categorical_col_grouped_on_2_categoricals  s    r  c                 C   s   t ddddg}ddddg}t|||d}t ddg}tj||gddgd}tdtjtjdg|ddtdtjtjdg|ddd	}||   }|r| 	tj
}|jddg|d
}	t|	|  }
t|
| d S r  )r   r   r   r   r   r   r  r  r  r   r   r;   r+  r   r   )r,  r9   r   r   r?   r   r   r  r   r  r   r   r   r   Btest_df_groupby_first_on_categorical_col_grouped_on_2_categoricals  s    r  c                  C   s   t tdddgdddgdtdd} | jddd	}|j}tjd
dgddtjdgddtjg ddd}| | ks~t| D ]}t	
|| ||  qd S )NrB   r   rC   r   rG   )r   r$  r   Fr  r   rE   Zintpr   rF   )rB   r   rC   )r   r   r   r;   indicesr   r   r  r>   r   r/  )r?   r   r   r   r   r   r   r   2test_groupby_categorical_indices_unused_categories  s    r  c                 C   sp   t ddddgi}|d d|d< t|dd |  }ttdddgdtdddgddd}t|| d S )	Nr   rE   rF   rG   r   rB   rb   )rL   r!   )	r   r   r+  r;   r   r   r   r   r   )r,  r?   r   r   r   r   r   1test_groupby_last_first_preserve_categoricaldtype  s      r  c               	   C   s   t ddgddgddgd} | jdddd} | jd	d
gdd d }tddgttddgd	dtddgd
dgdd}t	|| d S )NrE   rF   rf   r  r   r   rI   r   r   rB   Tr8   rC   rb   r   )
r   r   r;   r.   r   r   r   r   r   r   r3  r   r   r   )test_groupby_categorical_observed_nunique  s    r  c                  C   s   t jddgdd} tddgddgddggddgd	d| i}|dd  }tddgtddgdd
dt jddgddd}t	|| d S )Nr  bigTr   rE   rF   grpdescriptionrY   rb   r  )
r   r  r   r   r;   r*   r   r   r   r   )r   r?   r   r   r   r   r   ,test_groupby_categorical_aggregate_functions  s     r  c                 C   s   t ddgdddgd}tt ddgdddgdddgd}|jd| |d}| }| rltd	ddgi|d
}n,tdddgdddg}td	dddgi|d
}d|j_t|| d S )NrE   rF   rG   r   r4   )r]   rO   r]   )r9   r  rO   rM   r   )	r   r   r;   r2   r   r!   rL   r   r   )r9   r  r   r?   r   r   r   r!   r   r   r   test_groupby_categorical_dropna)  s    $r  )`r   Znumpyr   r  Zpandasr   r   r   r   r   r   r   r	   Zpandas._testingZ_testingr   Zpandas.tests.groupbyr
   r  r"   r  rA   r   r   r   rv  Zparametrizer   filterwarningsr   r   r   r   r   r   r   r   r   r  r  r  r  r   renamer  r  r   r%  r-  r0  r4  r=  rA  rC  rF  rG  rK  r   rN  r   ZNaTrQ  ZfixturerR  rX  rZ  r   r\  r]  rb  re  rh  r~  r  r  r  r  r.   r%   r  r  r  r  r  r  r  r  r   boolr  r  r  r  r  r  r  r   r   r   r   <module>   s  $	! 
%

l '
$

8



	
%:[ 




"
",
	
	

-
#

-5  
