U
    9%evF                     @   s<  d dl Z d dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlmZmZmZmZ ddddgdd	d
dgd dddgddddggZejddd d d d gdddd gddddgddddggdfdd d d d gd d d d gddddgddddggdfdd d d d gddddgddddgddddggdfdd d d d gddddgddddgddddggddddgfdd d d d gddddgddddgddddggddddgfdd d d d gd d d d gddddgddddggd dddgfdd d d d gdddd gddddgddddggdd ddgfdd d d d gd d d d gddddgddddggddddgfgejddd Zdd Zejddgdd Zdd Zejddd d d d gd ddd gddddgddddggdfdd d d d gd d d d gddddgddddggdfdd d d d gd dddgddddgddddggdfdd d d d gd dddgddddgddddggddddgfdd d d d gd d d d gddddgddddggd dddgfdd d d d gd ddd gddddgddddggdd ddgfgejdd d! Zejd"d#d$ Zejdejdddgd%d& Zejddddgd'd( Zd)d* Zejd+edd,d-d. Zd/d0 Z ejd1dd d d d ddgd d d d ddgd d ddd2d2gfdd d d d ddgd d ddddgd d dddd2gfdd d d dddgd d ddddgd dddd2d2gfgejdd3d4 Z!ejd5dd6d7d8dgdd9d:dgdd;d6dgdd;d6dggfdd<d=d>d?gd<d=d>d?gd@dAdBdCgdDdEdFdGggfdd6d7d8dHgdd9d:dIgdd;d6dJgdd;d6dJggfgejdejdKdLdMdNgdOdP Z"ejdejddddgdQdR Z#dSdT Z$ejdUdd ddgfdd ddgfgdVdW Z%dXdY Z&ejdZej'ej(ej)gejd[dej(ej)gejdKdLdMdNgd\d] Z*ejd^ej'ej(ej)gejdKdLdMdNgd_d` Z+dadb Z,ejdcdMddde edD fdNdfde edD fdLdgde edD fgdhdi Z-ejddddgdjdk Z.ejdddgdldm Z/dS )n    N)clone)KBinsDiscretizerOneHotEncoder)assert_allcloseassert_allclose_dense_sparseassert_array_almost_equalassert_array_equal      ?      @g      g      @      ?   g      @   z!strategy, expected, sample_weightuniformZkmeansquantile   z0ignore:In version 1.5 onwards, subsample=200_000c                 C   s0   t dd| d}|jt|d t||t d S )Nr   ordinaln_binsencodestrategysample_weight)r   fitXr   	transform)r   expectedr   est r!   n/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/sklearn/preprocessing/tests/test_discretization.pytest_fit_transform   s    $r#   c                   C   sR   t ddt t tdgd dt t ddtjjttksNt	d S )Nr   r   r   )
r   fit_transformr   nparrayr   n_bins_dtypeintAssertionErrorr!   r!   r!   r"   test_valid_n_bins<   s    r,   r   c              	   C   sL   t jttd}td| d}d}tjt|d |jt|d W 5 Q R X dS )z=Check that we raise an error when the wrong strategy is used.)shaper   r   r   zK`sample_weight` was provided but it cannot be used with strategy='uniform'.matchr   N)	r&   Zoneslenr   r   pytestraises
ValueErrorr   )r   r   r    err_msgr!   r!   r"   1test_kbinsdiscretizer_wrong_strategy_with_weightsB   s    r6   c               	   C   s   t dd} t| d}d}tjt|d |t W 5 Q R X dddg} t| d}d}tjt|d |t W 5 Q R X ddddg} t| d}d}tjt|d |t W 5 Q R X d	dd	dg} t| d}d
}tjt|d |t W 5 Q R X d S )N)r             @r$   z:n_bins must be a scalar or array of shape \(n_features,\).r/   r   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 3. Number of bins must be at least 2, and must be an int.g @z{KBinsDiscretizer received an invalid number of bins at indices 0, 2. Number of bins must be at least 2, and must be an int.)r&   fullr   r2   r3   r4   r%   r   )r   r    r5   r!   r!   r"   test_invalid_n_bins_arrayN   s,    




r:   c                 C   s   t ddddgd| djt|d}t||t ttjd }|jj|fksRt	t
|j|jD ]\}}|j|d fks`t	q`d S )Nr   r   r   r   r   r   )r   r   r   r   r   r&   r'   r-   
bin_edges_r+   zipr(   )r   r   r   r    Z
n_features	bin_edgesr   r!   r!   r"   test_fit_transform_n_bins_arrayt   s    '
   r>   z&ignore: Bins whose width are too smallc                  C   s   t dgdgdgdgdgdgg} tddd	d
}|j| ddddddgd t|jd ddddg t|| dgdgdgdgdgdgg dS )z;Check the impact of `sample_weight` one computed quantiles.r	   r   r   r   i  i  
   r   r   r   r   r           g      ?r8   N)r&   r'   r   r   r   r;   r   r   r    r!   r!   r"   *test_kbinsdiscretizer_effect_sample_weight   s
    "rB   c                 C   sL   t dd| d}tjddddgtjd}t|}|jt|d t|| dS )	z7Make sure that `sample_weight` is not changed in place.r   r   r   r   r   r)   r   N)r   r&   r'   float64copyr   r   r   )r   r    r   Zsample_weight_copyr!   r!   r"   /test_kbinsdiscretizer_no_mutating_sample_weight   s
    
rF   c              	   C   s   t d tddgddgddgddgg}t| ddd}d	}tjt|d
 || W 5 Q R X |j	d dkstt
||}t|d d df t|jd  d S )Nalwaysr   r	   r   r   r   r   )r   r   r   z2Feature 0 is constant and will be replaced with 0.r/   )warningssimplefilterr&   r'   r   r2   warnsUserWarningr   r(   r+   r   r   Zzerosr-   )r   r   r    warning_messageXtr!   r!   r"   test_same_min_max   s    
"
rN   c               	   C   st   t d} tdd}tt ||  W 5 Q R X tdd}|| dd tt ||  W 5 Q R X d S )Nr7   r   r$   r   r   )	r&   aranger   r2   r3   r4   r   reshaper   rA   r!   r!   r"   test_transform_1d_behavior   s    


rQ   i	   c                 C   sd   t dddddgdd}t dddddgdd}|d	|   }td
dd|}t|| d S )Nr8         @g      @g       @g      $@r   r   r   r?   r   r   r   r   )r&   r'   rP   r   r%   r   )rR   ZX_initZXt_expectedr   rM   r!   r!   r"   test_numeric_stability   s
    rV   c                  C   s   t ddddgddt} | t}t ddddgddt} | t}t|rVtttdd dD d	d
	|| t ddddgddt} | t}t|stttdd dD dd
	|
 |
  d S )Nr   r   r   rU   onehot-densec                 S   s   g | ]}t |qS r!   r&   rO   .0rR   r!   r!   r"   
<listcomp>   s     z'test_encode_options.<locals>.<listcomp>)r   r   r   r   F)
categoriesZsparse_outputonehotc                 S   s   g | ]}t |qS r!   rX   rY   r!   r!   r"   r[      s     T)r   r   r   r   spissparser+   r   r   r%   Ztoarray)r    ZXt_1ZXt_2ZXt_3r!   r!   r"   test_encode_options   s4    

 
 r`   z8strategy, expected_2bins, expected_3bins, expected_5binsr7   c                 C   s   t ddddddgdd}td| d	d
}||}t||  td| d	d
}||}t||  td| d	d
}||}t||  d S )Nr   r   r   r   rS   r?   r   r   r   r   r   r      )r&   r'   rP   r   r%   r   Zravel)r   Zexpected_2binsZexpected_3binsZexpected_5binsr   r    rM   r!   r!   r"   test_nonuniform_strategies   s    


rc   zstrategy, expected_invg      r8   g      g      @g      rT   g      g      @g      g      g      g      @g      g      ?g      ?g      @g      g      ?g      r@   g      ?r   r   r]   rW   c                 C   s0   t d| |d}|t}||}t|| d S )Nr   ra   )r   r%   r   inverse_transformr   )r   r   Zexpected_invkbdrM   Xinvr!   r!   r"   test_inverse_transform  s    $

rg   c                 C   s   t ddddgd d d f }td| dd}|| t dd	gd d d f }||}t|jdd
d |j t|jdd
dg d S )Nr   r   r   r   r7   r   ra   r	   rb   )Zaxis)	r&   r'   r   r   r   r   maxr(   min)r   r   re   ZX2ZX2tr!   r!   r"    test_transform_outside_fit_rangeG  s    

rj   c                  C   s   t ddddgd d d f } |  }tddd}|| }t| | | }||}t|| t|t dgdgd	gd	gg d S )
Nr   r   r   r   r   rU   r   r
   r   )r&   r'   rE   r   r%   r   rd   )r   ZX_beforer    rM   Z	Xt_beforerf   r!   r!   r"   test_overwriteT  s    



rk   zstrategy, expected_bin_edgesc              	   C   sd   dgdgdgdgdgdgg}t d| d}d}tjt|d || W 5 Q R X t|jd | d S )Nr   r   r.   'Consider decreasing the number of bins.r/   )r   r2   rJ   rK   r   r   r;   )r   Zexpected_bin_edgesr   re   rL   r!   r!   r"   test_redundant_binsb  s    rm   c               	   C   s   t dddgdd} t ddddddg}t d	d	d
gdd}tdddd}d}tjt|d ||  W 5 Q R X t|j	d	 | t|
| | d S )Ng?gffffff?r   r   gq=
ףp?g=
ףp=?gzG?gp=
ף?r   r7   r?   r   r   r   rl   r/   )r&   r'   rP   r   r2   rJ   rK   r   r   r;   r   )r   r=   rM   re   rL   r!   r!   r"   !test_percentile_numeric_stabilityn  s    rn   in_dtype	out_dtypec                 C   sr   t jt| d}td||d}|| |d k	r4|}n"|d krP|jt jkrPt j}n|j}||}|j|ksnt	d S NrC   r   )r   r   r)   )
r&   r'   r   r   r   r)   float16rD   r   r+   )ro   rp   r   X_inputre   Zexpected_dtyperM   r!   r!   r"   test_consistent_dtype{  s    

rt   input_dtypec                 C   sd   t jt| d}td|t jd}|| ||}td|t jd}|| ||}t|| d S rq   )	r&   r'   r   r   float32r   r   rD   r   )ru   r   rs   Zkbd_32ZXt_32Zkbd_64ZXt_64r!   r!   r"   test_32_equal_64  s    



rw   c                  C   s   t ddddgdd} tdddd	}||  t|}|jd d
 ||  t|jd |jd D ]\}}t j	
|| qf|jj|jjkstd S )Nr	   r
   r   r   r   r?   r   r   r   	subsampler   )r&   r'   rP   r   r   r   
set_paramsr<   r;   testingr   r-   r+   )r   Zkbd_defaultZkbd_without_subsamplingZbin_kbd_defaultZbin_kbd_with_subsamplingr!   r!   r"   'test_kbinsdiscretizer_subsample_default  s    

 r|   zencode, expected_namesc                 C   s.   g | ]&}t d D ]}d| dt| qqS r7   feat_rangefloatrZ   col_idZbin_idr!   r!   r"   r[     s   
 r[   c                 C   s.   g | ]&}t d D ]}d| dt| qqS r}   r   r   r!   r!   r"   r[     s   
 c                 C   s   g | ]}d | qS r~   r!   )rZ   r   r!   r!   r"   r[     s     c                 C   s   dddgdddgdddgdd	dgg}t d	| d
|}||}dd tdD }||}|jd |jd ksttt|| dS )z[Check get_feature_names_out for different settings.
    Non-regression test for #22731
    r	   r   r   r   r   r   r   r   r7   rU   c                 S   s   g | ]}d | qS r   r!   rY   r!   r!   r"   r[     s     z>test_kbinsdiscrtizer_get_feature_names_out.<locals>.<listcomp>N)r   r   r   r   Zget_feature_names_outr-   r+   r   )r   Zexpected_namesr   re   rM   Zinput_featuresZoutput_namesr!   r!   r"   *test_kbinsdiscrtizer_get_feature_names_out  s    $

r   c                 C   sj   t j|dd }t| d|d}|| t|}|jd d || t|j	d |j	d dd d S )	N)i r   r   iP  )r   ry   random_staterx   r   g{Gz?)Zrtol)
r&   randomRandomStaterandom_sampler   r   r   rz   r   r;   )r   Zglobal_random_seedr   Zkbd_subsamplingZkbd_no_subsamplingr!   r!   r"   test_kbinsdiscretizer_subsample  s      

  r   c              	   C   sF   t jdd}t| dd}tjtdd || W 5 Q R X d S )Nr   )d   r   )r   r   z)subsample=200_000 will be used by defaultr/   )	r&   r   r   r   r   r2   rJ   FutureWarningr   )r   r   re   r!   r!   r"   test_kbd_subsample_warning  s    r   )0rH   numpyr&   r2   Zscipy.sparsesparser^   Zsklearnr   Zsklearn.preprocessingr   r   Zsklearn.utils._testingr   r   r   r   r   markZparametrizefilterwarningsr#   r,   r6   r:   r>   rB   rF   rN   rQ   r   rV   r`   rc   rg   rj   rk   rm   rn   rr   rv   rD   rt   rw   r|   r   r   r   r!   r!   r!   r"   <module>   s&  ,000*
*
*
*
*
"

&000*
*
*
%



	


...	














!

 
		

