U
    sVc=                     @   st  d dl Zd dlZd dlmZ d dlmZ d dlZd dl	m
Z ejddddgddggdd	gdd
gddgdfdddgdejgddggddd	gddd
gdddgdfgdd Zejddddgddggdd	gdd
gddgdfdddgdejgddgejdggddd	dgddd
dgddddgdfgdd Zejddddgd	dgd
dgddgdfdddejgd	ddgd
ddgdddgdfgdd Zejdddddejgejddgddgdfddddejgejdddgddejgdfgd d! Zejd"dejd#d$gddgd%d&fdejd#d$d'gddejgd%d&fgd(d) Zejd*d+d,d- Zejd.ed d/ed0ejjd1d2gd3d4gd5gd6d7 Zejddddgddggdd	gdd
gddgdfdddgdejgddggddd	gddd
gdddgdfgd8d9 Zejjejd:ed;ed<fed=ed>fed;ed<fgejd?dd@dgfdd@ddAgfgdBdC Z ejdDej!ddddejgdEdEdFdGgdHdId dJd d giddejgdKdLej!ddddejgdEdEdFdGgdHdId dJd giddMdLej!dddddNgdEdEdFdGgdHdId dJd d giddOdLej!dddddNgdEdEdFdGgdHdId dJd d giddPdLgdQdR Z"ejdSddgddggejdTdgddggejdUddgdVdW Z#dXdY Z$dZd[ Z%ejd\e&d]ejd^dd_d`dadbdcdddedfdgdfdhdidjej!dkejj'edldmdndodpdqgejdrddgdsdt Z(ejdrddgejd^e)dgdudv Z*dS )w    N)pa_version_under1p01)na_value_for_dtypezdropna, tuples, outputsTABg      *@Q^@g     ^@      ?cdeF皙(@      m@g      (@c           	      C   s   dddddgd|dddgdddddgddddd	gg}t j|d
ddddgd}|jd
dg| d }t jj|tdd}| s|jddtj	gdd}t j||d}t
|| d S )Nr   r      r   r   r   {      r   abr	   r
   r   columnsdropnaabnameslevelindexpd	DataFramegroupbysum
MultiIndexfrom_tupleslist
set_levelsnpnantmassert_frame_equal)	r   tuplesoutputsnulls_fixturedf_listdfgroupedmiexpected r3   L/tmp/pip-unpacked-wheel-xj8nt62q/pandas/tests/groupby/test_groupby_dropna.py:test_groupby_dropna_multi_index_dataframe_nan_in_one_group   s    r5   g*@g     @m@c           
   	   C   s   dddddgd|dddgdddddg|dddd	gd|ddd	gg}t j|d
ddddgd}|jd
dg| d }t jj|tdd}| s|ddtj	gddtj	gg}t j||d}	t
||	 d S )Nr   r   r   r   r   r   r   r   r   r   r   r	   r
   r   r   r   r   r   r   r   )
r   r+   r,   r-   nulls_fixture2r.   r/   r0   r1   r2   r3   r3   r4   ;test_groupby_dropna_multi_index_dataframe_nan_in_two_groups7   s    r7   zdropna, idx, outputs)r   r	   r
   c                 C   s~   ddddgd dddgddddgdddd	gg}t j|d
dddgd}|jd
| d }t j|t j|dd
dd}t|| d S )Nr   r   r   r   r   r   r   r   r   r   r   r	   r
   r   r   objectdtypenamer   )r   r    r!   r"   Indexr)   r*   )r   idxr,   r.   r/   r0   r2   r3   r3   r4   *test_groupby_dropna_normal_index_dataframec   s    



r>   zdropna, idx, expectedr   r      r   c                 C   s8   t jddddg|d}|jd| d }t|| d S )Nr      r?   r   r   )r   r   )r   Seriesr!   r"   r)   assert_series_equal)r   r=   r2   serresultr3   r3   r4    test_groupby_dropna_series_level   s    rE   zdropna, expectedg     @j@     u@	Max Speedr   r;         4@c                 C   sL   t jddddgddddgdd}|jd	d
d	tjg| d }t|| d S )Ng     `x@rF   g      >@rI   ZFalconZParrotrG   rH   r   r   r   )r   rA   r!   r'   r(   Zmeanr)   rB   )r   r2   rC   rD   r3   r3   r4   test_groupby_dropna_series_by   s    

rJ   r   )FTc                 C   sB   t dddd gdddd gd}|jd| d}|jj| ks>td S )Nr   r   r@   r?   r   r   r   r   )r   r    r!   Zgrouperr   AssertionError)r   r/   gbr3   r3   r4   test_grouper_dropna_propagation   s     rN   r      Zabcd)r   r@   )Rr   numcolr   c                 C   s   dddd| rt jndgi}tjdddd gdddd gd|d}|jd| d	}|t}tj||d}t|| |dg t}tj||d}t|| |d t}tj	|d |dd
}t
|| d S )Nr   r@   r   r   r?   rK   r   r   r   rH   )r'   r(   r   r    r!   Z	transformlenr)   r*   rA   rB   )r   r   Zexpected_datar/   rM   rD   r2   r3   r3   r4   +test_groupby_dataframe_slice_then_transform   s    
$
rT   c           	      C   s   dddddgdd dddgdddddgddddd	gg}t j|d
ddddgd}ttdd}|jd
dg| d|}t jj|tdd}| s|j	ddt
jgdd}t j||d}t|| d S )Nr   r   r   r   r   r   r   r   r   r   r   r	   r
   r   r   minr   r   r   r   r   r   )r   r    r"   maxr!   aggr#   r$   r%   r&   r'   r(   r)   r*   )	r   r+   r,   r.   r/   Zagg_dictr0   r1   r2   r3   r3   r4   -test_groupby_dropna_multi_index_dataframe_agg   s    rX   zdatetime1, datetime2z
2020-01-01z
2020-02-01z-2 daysz-1 dayszdropna, valuesr      c           
   	   C   s   t ddddddg||||||gd}| r6||g}n||tjg}|jd| d	d
ti}t jd
|it j|ddd}	t	||	 d S )Nr   r@   r?   rO      rY   )valuesdtr\   r   r[   r;   r   )
r   r    r'   r(   r!   rW   r"   r<   r)   r*   )
r   r[   Z	datetime1Z	datetime2Zunique_nulls_fixtureZunique_nulls_fixture2r/   Zindexesr0   r2   r3   r3   r4   &test_groupby_dropna_datetime_like_data   s"    
r^   z#dropna, data, selected_data, levels
         )groupsr[   r[   r   Zdropna_false_has_nan)idZdropna_true_has_nanr	   Zdropna_false_no_nanZdropna_true_no_nanc           
      C   s   t |}|jd| d}|dd }tt|d |d }t jj|dd gd}| sf|rf|j|dd}t j||d}	t	
||	 d S )	Nrb   r   c                 S   s   t dtt| iS )Nr[   )r   r    rangerS   )grpr3   r3   r4   <lambda>F      z@test_groupby_apply_with_dropna_for_multi_index.<locals>.<lambda>r[   r   r   r   )r   r    r!   applytuplezipr#   r$   r&   r)   r*   )
r   dataZselected_datalevelsr/   rM   rD   Z	mi_tuplesr1   r2   r3   r3   r4   .test_groupby_apply_with_dropna_for_multi_index  s    &
rm   input_indexkeysseriesc                 C   s   t dtjgddgddgd}||}|r8|d }n | ddgkrX|dgkrX|dg }| d k	rj|| }|j|dd	}|r|d }| }t|| d S )
Nr   r@   r?   )r   r   r	   r	   r   r   Fr   )	r   r    r'   r(   	set_indexr!   r"   r)   assert_equal)rn   ro   rp   objr2   rM   rD   r3   r3   r4   )test_groupby_dropna_with_multiindex_inputS  s$    



rt   c               	   C   s   dt jddt jgdddddgd} t| }|jd	d
d}|j}t j}dt jddg|ddt jdg|dt jt jddg|di}t|	 |	 D ]\}}t
|| qt t| d stt| dd ddgkstd S )NZg1Zg2r   r   r@   r?   rO   )groupr   ru   Fr   r:   )r'   r(   r   r    r!   indicesZintparrayrj   r[   r)   Zassert_numpy_array_equalisnanr%   ro   rL   )rk   r/   r0   rD   r:   r2   Zresult_valuesZexpected_valuesr3   r3   r4   test_groupby_nan_includedq  s     "
   rz   c                  C   sV   t jtjddggdddgd} | ddg} | jddgdd }| }t|| d S )	Nr   r   r   r   r	   r   Fr   )	r   r    r'   r(   rq   r!   firstr)   r*   )r/   rD   r2   r3   r3   r4   &test_groupby_drop_nan_with_multi_index  s
    r|   sequence_indexQ   r:   ZUInt8ZInt8ZUInt16ZInt16ZUInt32ZInt32ZUInt64ZInt64ZFloat32ZFloat64categorystringstring[pyarrow]zpyarrow is not installedreason)Zmarksdatetime64[ns]	period[d]zSparse[float]test_seriesc                    s  d  fddtdD }|dkrFd|krFd}| jtjj|d |d	kr^d
dtjdn&|dkrvddtjdnddt	j
dttjfdd|D |dddddgd}|jdddd}|r|d }| }i }	t|D ]\}
}|	|d|
 |	|< q|dkr>tjfdd|	D tfdd|D dd}n^t|tr~|dr~tjtjfd d|	D |ddd}ntjfd!d|	D |dd"}tj|	 |dd d#}|s| }t|| d S )$N c                    s(   g | ] }d ddd d|  d  qS )xyz)r   r   r@   r?   r3   .0k)r}   r3   r4   
<listcomp>  s     z(test_no_sort_keep_na.<locals>.<listcomp>rO   r   r   z2dropna=False not correct for categorical, GH#48645r   )r   r   r   r   )r   r   r   )r   r   z
2016-01-01z
2017-01-01r   r@   c                    s   g | ]} | qS r3   r3   r   labeluniquesr3   r4   r     s     rv   r   r?   )keyr   r   Fr   sortr   c                    s   g | ]} | qS r3   r3   )r   r   r   r3   r4   r     s     c                    s$   i | ]}t  | s | d qS )r   )r   Zisnullr   r   r3   r4   
<dictcomp>  s       z(test_no_sort_keep_na.<locals>.<dictcomp>r]   ZSparsec                    s   g | ]} | qS r3   r3   r   r   r3   r4   r     s     c                    s   g | ]} | qS r3   r3   r   r   r3   r4   r     s     r9   )r   r;   r:   )joinrd   nodeZ
add_markerpytestmarkZxfailr   ZNAr'   r(   r    rA   r!   r"   	enumerategetZCategoricalIndexr%   
isinstancestr
startswithr<   rx   r[   Zto_framer)   rr   )requestr}   r:   r   sequencemsgr/   rM   rD   Zsummedr=   r   r   r2   r3   )r}   r   r4   test_no_sort_keep_na  sN    #

 r   c                 C   s   t dddgi}t j||g|d}|r0|d n|}|j|d| d}| }	t t|jg}
t jddgi|
d}|rt	|	|d  nt
|	| d S )	Nr   r   r@   rv   Fr   r?   r   )r   r    rA   r!   r"   r<   r   r:   r)   rB   r*   )r   r:   r-   r6   r   r/   rb   rs   rM   rD   r   r2   r3   r3   r4   test_null_is_null_for_dtype  s    r   )+Znumpyr'   r   Zpandas.compat.pyarrowr   Zpandas.core.dtypes.missingr   Zpandasr   Zpandas._testingZ_testingr)   r   Zparametrizer(   r5   r7   r>   rA   rE   rJ   rN   Z
RangeIndexr%   r#   Zfrom_productrT   rX   Zarm_slowZ	TimestampZ	TimedeltaZPeriodr^   paramrm   rt   rz   r|   rd   Zskipifr   r8   r   r3   r3   r3   r4   <module>   sd  




 

$






"
#

 9