U
    _{flb                     @   s   d dl Z d dlZd dlZd dlZd dlmZ d dlZd dlm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZmZ G dd dZdS )    N)is_integer_dtype)CategoricalCategoricalIndex	DataFrame
RangeIndexSeriesget_dummies)SparseArraySparseDtypec                
   @   s(  e Zd Zejdd Zejddejedgddd Z	ejd	d
gddd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zejd3d4e d5d6giie d7d8gifd4e d9d5giie d:d8gife d9d6gid5d;e d7d8gife d9d6gid5d<e d=d8gifgd>d? Z!d@dA Z"dBdC Z#dDdE Z$dFdG Z%dHdI Z&dJdK Z'dLdM Z(dNdO Z)ejdPd8dQgdRdS Z*ejd
d8dQgdTdU Z+dVdW Z,dXdY Z-ejdZd[gd\d] Z.d^d_ Z/d`da Z0dS )bTestGetDummiesc                 C   s"   t dddgdddgdddgdS )Nabc         )ABC)r   )self r   d/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/pandas/tests/reshape/test_get_dummies.pydf   s    zTestGetDummies.dfuint8i8N)paramsc                 C   s   t |jS N)npdtypeparamr   requestr   r   r   r      s    zTestGetDummies.dtypeZdensesparsec                 C   s
   |j dkS )Nr"   )r   r    r   r   r   r"   "   s    zTestGetDummies.sparsec                 C   s   |d krt jS |S r   )r   r   )r   r   r   r   r   effective_dtype(   s    zTestGetDummies.effective_dtypec              	   C   s.   d}t jt|d t|dd W 5 Q R X d S )Nz1dtype=object is not a valid dtype for get_dummiesmatchobjectr   )pytestraises
ValueErrorr   )r   r   msgr   r   r   'test_get_dummies_raises_on_dtype_object-   s    z6TestGetDummies.test_get_dummies_raises_on_dtype_objectc                 C   s   t d}t|}t|t d}tdddgdddgdddgd| |d}|r\|jtdd}t|||d	}t|| t|||d	}t|| t d|_	t|||d	}t|| d S )
NabcABCr   r   r   r   r   r'           
fill_valuer"   r   )
listr   r   r#   applyr	   r   tmassert_frame_equalindex)r   r"   r   s_lists_seriess_series_indexexpectedresultr   r   r   test_get_dummies_basic2   s     
z%TestGetDummies.test_get_dummies_basicc                 C   s  t d}t|}tdddddgdddddgdddddgd	}tdddgdddgdddgd	| |t dd
}|rt|rd}n|tkrd}nd}|jt|d}t|||d}t	
|| t|||d}t	
|| t||j||d}|r
d| |j d| d}	n| |j}	t|	didd}|j }dd |jD |_t	|| t|dg||d}ddd}
d|
|	d |
|	< t|
dd }|j }dd |jD |_| }t	|| d S )Nr-   r   r   r   r   r   r   r   r/   )r   columnsFr0   r1   r3   )r?   r"   r   zSparse[z, ]   countnamec                 S   s   g | ]}t |qS r   str.0ir   r   r   
<listcomp>k   s     z?TestGetDummies.test_get_dummies_basic_types.<locals>.<listcomp>r   )int64r&   c                 S   s   g | ]}t |qS r   rE   rG   r   r   r   rJ   u   s     )r4   r   r   r#   r   boolr5   r	   r   r6   r7   r?   rD   ZdtypesZvalue_countsr8   Zassert_series_equalget
sort_index)r   r"   r   r9   r:   Zs_dfr<   r2   r=   Z
dtype_nameZexpected_countsr   r   r   test_get_dummies_basic_typesG   sJ    (


z+TestGetDummies.test_get_dummies_basic_typesc                 C   s   t jg}t|}t|dgd}t||d}t||d}t||d}|jsLt|jsVt|js`t|j dgkstt|j dgkst|j dgkstd S )Nr   r8   r"   r   )r   nanr   r   emptyAssertionErrorr8   tolist)r   r"   Zjust_na_listZjust_na_seriesZjust_na_series_indexZres_listZ
res_seriesZres_series_indexr   r   r   test_get_dummies_just_nay   s    


z'TestGetDummies.test_get_dummies_just_nac           
   	   C   s$  ddt jg}t|||d}tdddgdddgd| |d}|rP|jtdd	}t|| t|d
||d}tt jdddgddddgddddgi| |d}|j	ddt jgdd}|j
|_
|r|jtdd	}t|| tt jgd
||d}ttddgdt jg| |d}	t|j|	j d S )Nr   r   r3   r   r   )r   r   r'   r0   r1   Tdummy_nar"   r   ZaxisrP   r?   r   )r   rR   r   r   r#   r5   r	   r6   r7   reindexr?   r   Zassert_numpy_array_equalvalues)
r   r"   r   sresexpres_naexp_nares_just_naexp_just_nar   r   r   test_get_dummies_include_na   s4     "  z*TestGetDummies.test_get_dummies_include_nac                 C   sj   d}t d}|||g}t|d|d}tddddgd| dddgi}|rZ|jtd	d
}t|| d S )NezLATIN SMALL LETTER E WITH ACUTEletterprefixr"   Zletter_eTFZletter_r   r1   )unicodedatalookupr   r   r5   r	   r6   r7   )r   r"   re   eacuter]   r^   r_   r   r   r   test_get_dummies_unicode   s    

z'TestGetDummies.test_get_dummies_unicodec                 C   s   |ddg }t ||d}tdddgdddgdddgdddgdtd}|rttdddgddtdddgddtdddgddtdddgddd}t|| d S )	Nr   r   rQ   r   r   A_aA_bB_bB_cr'   rL   )r   r   rL   r	   r6   r7   r   r   r"   r=   r<   r   r   r   test_dataframe_dummies_all_obj   s    $	z-TestGetDummies.test_dataframe_dummies_all_objc                 C   sb   |ddg }| ddd}t|}tdddgdddgdddgdddgdtd	}t|| d S )
Nr   r   r&   stringr   r   r   r   rm   r'   )astyper   r   rL   r6   r7   r   r   r=   r<   r   r   r   #test_dataframe_dummies_string_dtype   s    	z2TestGetDummies.test_dataframe_dummies_string_dtypec              	   C   s   t |||d}|r"t}t|d}n
tj}|}tdddg|dddg|d|dddg|d|dddg|d|dddg|dd}|dd	d
ddg }t|| d S )Nr3   r   r   r   r   r'   )r   rn   ro   rp   rq   r   rn   ro   rp   rq   )r   r	   r
   r   arrayr   r6   r7   r   r   r"   r   r=   Zarrtypr<   r   r   r   "test_dataframe_dummies_mix_default   s     	z1TestGetDummies.test_dataframe_dummies_mix_defaultc                    s   ddg}t |||d}tdddgdddgdddgdddgdddgd	}|d
g |d
g< ddddg}|d
g|  }|rztnt ||  fdd||< t|| d S )Nfrom_Afrom_Brg   r   r   r   TFr   from_A_afrom_A_bfrom_B_bfrom_B_cr   r   r   r   r   c                    s    | S r   r   )xr{   r   r   <lambda>       zCTestGetDummies.test_dataframe_dummies_prefix_list.<locals>.<lambda>)r   r   r	   r   r5   r6   r7   )r   r   r"   prefixesr=   r<   colsr   r   r   "test_dataframe_dummies_prefix_list   s     	z1TestGetDummies.test_dataframe_dummies_prefix_listc              
   C   s   t |d|d}ddddg}tdddddgd	ddddgd
ddddggdg| d}|dtji}|rtjtdd	d
gddtdddgdddtdddgdddtdddgdddtdddgdddgdd}t	|| d S )Nbadrg   Zbad_aZbad_bZbad_cr   TFr   r   r   r?   rC   zSparse[bool])rD   r   rY   )
r   r   rv   r   rK   pdconcatr   r6   r7   )r   r   r"   r=   Zbad_columnsr<   r   r   r   !test_dataframe_dummies_prefix_str   s*    z0TestGetDummies.test_dataframe_dummies_prefix_strc                 C   s   t |dgdg|d}tdddgdddgdd	dgd	dd	gd
}|j}||dd   t||dd  < |dg |dg< |rddg}|| tdd	||< t|| d S )Nr}   r   )rh   r?   r"   r   r   r   r   r   r   )r   r   r   r   r   r   r   rL   )r   r   r?   rv   rL   r
   r6   r7   r   r   r"   r=   r<   r   r   r   r   test_dataframe_dummies_subset  s    "z,TestGetDummies.test_dataframe_dummies_subsetc                 C   s   t |d|d}tdddgdddgdddgdddgdddgd}|d	g |d	g< |d	d
dddg }|rd
dddg}|| tdd||< t|| t |ddg|d}|jdddd}t|| t |ddd|d}t|| d S )Nz..
prefix_sepr"   r   r   r   TF)r   A..aA..bB..bB..cr   r   r   r   r   rL   r   __ZB__bZB__c)r   r   r   ru   )r   r   rv   r
   r6   r7   renamer   r   r   r   !test_dataframe_dummies_prefix_sep.  s(    	z0TestGetDummies.test_dataframe_dummies_prefix_sepc              	   C   s8   t d}tjt|d t|dg|d W 5 Q R X d S )NzPLength of 'prefix' (1) did not match the length of the columns being encoded (2)r$   ztoo fewrg   reescaper(   r)   r*   r   r   r   r"   r+   r   r   r   (test_dataframe_dummies_prefix_bad_lengthH  s
    z7TestGetDummies.test_dataframe_dummies_prefix_bad_lengthc              	   C   s8   t d}tjt|d t|dg|d W 5 Q R X d S )NzTLength of 'prefix_sep' (1) did not match the length of the columns being encoded (2)r$   r   r   r   r   r   r   r   ,test_dataframe_dummies_prefix_sep_bad_lengthP  s
    z;TestGetDummies.test_dataframe_dummies_prefix_sep_bad_lengthc                 C   s   ddd}t dddgdddgddd	gd
}t|||d}t dddgdddgdddgdddgdddgd}ddddg}|| t||< |r|| tdd||< t|| d S )Nr}   r~   ru   r   r   r   r   r   r   )r   r   r   rg   r   r   r   r   r   r   rL   )r   r   rv   rL   r
   r6   r7   )r   r"   r   r   r=   r<   r?   r   r   r   "test_dataframe_dummies_prefix_dictX  s     
"
z1TestGetDummies.test_dataframe_dummies_prefix_dictc                 C   s  t jt jt jg|jdd d f< t|d||djdd}|rJt}t|d}n
t j}|}tdddt jg|ddddg|d|ddddg|d|ddddg|d|ddddg|d|ddddg|d|ddddg|dd	jdd}t	
|| t|d
||d}|dddddg }t	
|| d S )Nr   TrW   r   rY   r   r   r'   )r   rn   ro   A_nanrp   rq   B_nanFr   rn   ro   rp   rq   )r   rR   locr   rN   r	   r
   ry   r   r6   r7   rz   r   r   r   test_dataframe_dummies_with_nan  s2    
z-TestGetDummies.test_dataframe_dummies_with_nac                 C   s   t dddg|d< t|||djdd}|r<t}t|d}n
tj}|}tddd	g|dddg|d
|dddg|d
|dddg|d
|dddg|d
|dddg|d
|dddg|d
djdd}t	|| d S )Nr   ycatr3   r   rY   r   r   r   r'   )r   rn   ro   rp   rq   Zcat_xcat_y)
r   r   rN   r	   r
   r   ry   r   r6   r7   rz   r   r   r   'test_dataframe_dummies_with_categorical  s(    
z6TestGetDummies.test_dataframe_dummies_with_categoricalzget_dummies_kwargs,expecteddata   är   u   ä_aTr   u   x_ä)r   rh   )r   r   u   xäac                 C   s   t f |}t|| d S r   )r   r6   r7   )r   Zget_dummies_kwargsr<   r=   r   r   r   test_dataframe_dummies_unicode  s    
z-TestGetDummies.test_dataframe_dummies_unicodec                 C   s   t d}t|}t|t d}tdddgdddgdtd}t|d|d}|r\|jtdd	}t|| t|d|d}t|| t d|_	t|d|d}t|| d S )
Nr-   r.   r   r   )r   r   r'   T
drop_firstr"   r1   )
r4   r   r   rL   r   r5   r	   r6   r7   r8   r   r"   r9   r:   r;   r<   r=   r   r   r   !test_get_dummies_basic_drop_first  s    
z0TestGetDummies.test_get_dummies_basic_drop_firstc                 C   s   t d}t|}t|t d}ttdd}t|d|d}t|| t|d|d}t|| tt dd}t|d|d}t|| d S )NZaaar.   r   rP   Tr   )r4   r   r   r   r   r6   r7   r   r   r   r   +test_get_dummies_basic_drop_first_one_level  s    z:TestGetDummies.test_get_dummies_basic_drop_first_one_levelc           	      C   s   ddt jg}t|d|d}tddddgitd}|rB|jtdd}t|| t|dd|d	}tddddgt jdddgitdj	dt jgdd
}|r|jtdd}t|| tt jgdd|d	}tt
dd}t|| d S )Nr   r   Tr   r   r   r'   r1   rX   r   r"   rY   rP   )r   rR   r   r   rL   r5   r	   r6   r7   r[   r   )	r   r"   Zs_NAr^   r_   r`   ra   rb   rc   r   r   r   $test_get_dummies_basic_drop_first_NA  s,    "    z3TestGetDummies.test_get_dummies_basic_drop_first_NAc                 C   sZ   |ddg }t |d|d}tdddgdddgdtd}|rJ|jtdd	}t|| d S )
Nr   r   Tr   r   r   )ro   rq   r'   r1   )r   r   rL   r5   r	   r6   r7   rr   r   r   r   !test_dataframe_dummies_drop_first  s    z0TestGetDummies.test_dataframe_dummies_drop_firstc                 C   s   t dddg|d< t|d|d}tdddgd	dd	gd	d	dgd	ddgd
}dddg}|| t||< |ddddg }|r|D ]}t|| ||< q~t|| d S )Nr   r   r   Tr   r   r   r   r   )r   ro   rq   r   ro   rq   r   r   )r   r   r   rv   rL   r	   r6   r7   )r   r   r"   r   r=   r<   r   colr   r   r   2test_dataframe_dummies_drop_first_with_categorical  s    $
zATestGetDummies.test_dataframe_dummies_drop_first_with_categoricalc              	   C   s   t jt jt jg|jdd d f< t|dd|djdd}tdddt jgddddgddddgddddgddddgd}d	d
ddg}|| t||< |jdd}|r|D ]}t|| ||< qt	
|| t|dd|d}|dd	dg }t	
|| d S )Nr   Tr   r   rY   r   r   )r   ro   r   rq   r   ro   r   rq   r   Fr   )r   rR   r   r   rN   r   rv   rL   r	   r6   r7   )r   r   r"   r=   r<   r   r   r   r   r   )test_dataframe_dummies_drop_first_with_na  s6       



	z8TestGetDummies.test_dataframe_dummies_drop_first_with_nac                 C   s   t dddg}t|}tddgddgddggddgtd}t|| t tdddg}t|}tddgddgddggtddgtd}t|| d S )Nr   r   r   rZ   r   r   )r   r   r   rL   r6   r7   r   )r   r   r=   r<   r   r   r   test_get_dummies_int_int0  s    $ 
 z'TestGetDummies.test_get_dummies_int_intc              	   C   s   t dddgtdddgdddgdddgd}dd	d
dddg}t ddddddgddddddgddddddgg|d}||dd   |||dd  < t|ddg|d}t|| d S )Nr   r   r   r   g      ?g       @)r   r   r   Dr   r   A_1A_2ZB_arp   r   r   r   r   rZ   )r   r   rv   r   r6   r7   )r   r   r   r?   r<   r=   r   r   r   test_get_dummies_int_df=  s    ,"z&TestGetDummies.test_get_dummies_int_dforderedFc                 C   s|   t tdtd|d}t||d}tjdddgdddgg| |d}t|j|j|d}t||| |d}t	
|| d S )NZxyZxyz)
categoriesr   r'   r   r   rZ   )r   r4   r   r   ry   r#   r   r   r   r6   r7   )r   r   r   r   r=   r   r   r<   r   r   r   1test_dataframe_dummies_preserve_categorical_dtypeO  s    $  z@TestGetDummies.test_dataframe_dummies_preserve_categorical_dtypec                 C   sL   t ddgddgd}t|dg|d}|jdgd	}t|dg | d S )
Nr   r   ZABZCD)GDPNationr   r?   r"   r   r   )r   	from_dictr   r[   r6   r7   )r   r"   r   Zdf2r   r   r   *test_get_dummies_dont_sparsify_all_columns]  s    z9TestGetDummies.test_get_dummies_dont_sparsify_all_columnsc                 C   s~   dddg|_ t|jdd}tdddddgdddddgdddddggddd	d	d
gdjdd}|dtji}t|| d S )Nr   r   rY   TFr   r   rn   ro   ZA_cr   )	r?   r   rN   r   rv   r   rK   r6   r7   rw   r   r   r   "test_get_dummies_duplicate_columnsf  s    	z1TestGetDummies.test_get_dummies_duplicate_columnsc                 C   s`   t dddgi}t|dgdd}tdd}t tddg|dtddg|dd	}t|| d S )
Nr   r   r   Tr   rL   r   r'   )r   r   )r   r   r
   r	   r6   r7   )r   r   r=   r   r<   r   r   r   test_get_dummies_all_sparsex  s    
z*TestGetDummies.test_get_dummies_all_sparser\   bazc              
   C   sp   t ddddddgddddddgd	d
dd	d
dgddddddgd}d}tjt|d t||d W 5 Q R X d S )Nr   r   r            onetwor   r   r   r   r   zqwt)barZfoor   Zzooz1Input must be a list-like for parameter `columns`r$   r   )r   r(   r)   	TypeErrorr   )r   r\   r   r+   r   r   r   #test_get_dummies_with_string_values  s    	z2TestGetDummies.test_get_dummies_with_string_valuesc                 C   sT   t td}t||d}tddddgddddgddddgd|d}t|| d S )Nabcar'   r   r   r/   )r   r4   r   r   r6   r7   )r   any_numeric_ea_and_arrow_dtypeZserr=   r<   r   r   r    test_get_dummies_ea_dtype_series  s    "z/TestGetDummies.test_get_dummies_ea_dtype_seriesc                 C   sX   t dtdi}t||d}t ddddgddddgddddgd|d}t|| d S )Nr   r   r'   r   r   )Zx_aZx_bZx_c)r   r4   r   r6   r7   )r   r   r   r=   r<   r   r   r   #test_get_dummies_ea_dtype_dataframe  s    "z2TestGetDummies.test_get_dummies_ea_dtype_dataframe)1__name__
__module____qualname__r(   Zfixturer   r   Zfloat64rL   r   r"   r#   r,   r>   rO   rV   rd   rl   rs   rx   r|   r   r   r   r   r   r   r   r   r   markZparametrizer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r      sx   


2




r   )r   ri   numpyr   r(   Zpandas.core.dtypes.commonr   Zpandasr   r   r   r   r   r   r   Zpandas._testingZ_testingr6   Zpandas.core.arrays.sparser	   r
   r   r   r   r   r   <module>   s    