U
    -e9/                     @   s  d Z ddlZddlmZmZ ddlZddlZddlm	Z	m
Z
 ddlmZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZmZ e Ze Z dd Z!dd Z"dd Z#dd Z$dd Z%dd Z&dd Z'ej()dddgdd Z*dd  Z+d!d" Z,d#d$ Z-d%d& Z.d'd( Z/ed)ef d*d+id,ej()d-d.d/gd0d1 Z0ed)ef d*d2id,ej()d-d.d/gd3d4 Z1d5d6 Z2d7d8 Z3d9d: Z4d;d< Z5dS )=zD
Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
    N)Mockpatch)
csc_matrix
csr_matrix)load_diabetes	load_irismake_classification)IsolationForest)_average_path_length)roc_auc_score)ParameterGridtrain_test_split)check_random_state)assert_allcloseassert_array_almost_equalassert_array_equalignore_warningsc              	   C   s   t ddgddgg}t ddgddgg}tdgdddgddgd	}t . |D ]"}tf d
| i||| qTW 5 Q R X dS )z6Check Isolation Forest for various parameter settings.r                  ?      ?TF)n_estimatorsmax_samples	bootstraprandom_stateN)nparrayr   r   r	   fitpredict)global_random_seedX_trainX_testgridparams r%   d/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/sklearn/ensemble/tests/test_iforest.pytest_iforest"   s    r'   c                 C   s   t | }ttjdd |d\}}tddgddgd}ttfD ]p}||}||}|D ]V}tf d	| d
||}	|		|}
tf d	| d
||}|	|}t
|
| qXq@dS )z=Check IForest for various parameter settings on sparse input.N2   r   r   r   TF)r   r   
   )r   r   )r   r   diabetesdatar   r   r   r	   r   r   r   )r    rngr!   r"   r#   Zsparse_formatZX_train_sparseZX_test_sparser$   Zsparse_classifierZsparse_resultsZdense_classifierZdense_resultsr%   r%   r&   test_iforest_sparse2   s4     
 
r.   c               	   C   s   t j} d}tjt|d tdd|  W 5 Q R X t " t	dt tdd|  W 5 Q R X t ( t	dt tt
dd|  W 5 Q R X tt( t | | ddd	df  W 5 Q R X dS )
z7Test that it gives proper exception on deficient input.3max_samples will be set to n_samples for estimationmatch  r   errorautor   Nr   )irisr,   pytestwarnsUserWarningr	   r   warningscatch_warningssimplefilterr   Zint64Zraises
ValueErrorr   )Xwarn_msgr%   r%   r&   test_iforest_errorL   s    

 r@   c               	   C   sF   t j} t | }|jD ](}|jttt	| j
d kstqdS )zDCheck max_depth recalculation when max_samples is reset to n_samplesr   N)r6   r,   r	   r   estimators_	max_depthintr   ceillog2shapeAssertionError)r>   clfZestr%   r%   r&   test_recalculate_max_depthb   s    
rI   c               	   C   s   t j} t | }|j| jd ks&ttdd}d}tjt	|d ||  W 5 Q R X |j| jd kslttdd| }|jd| jd  kstd S )Nr   i  r3   r/   r0   g?)
r6   r,   r	   r   max_samples_rF   rG   r7   r8   r9   )r>   rH   r?   r%   r%   r&   test_max_samples_attributej   s    
rK   c                 C   s   t | }ttj|d\}}td| d|}|jdd ||}|jdd ||}t|| td| d|}||}t|| dS )zCheck parallel regression.r)   r   )n_jobsr   r   )rL   r   N)	r   r   r+   r,   r	   r   
set_paramsr   r   )r    r-   r!   r"   Zensembley1y2Zy3r%   r%   r&    test_iforest_parallel_regressiony   s    



rP   c           	      C   s   t | }d|dd }|t|d |d f}|dd }|jdddd	}t|dd |f}td
gd dgd  }td|d|}|	| }t
||dkstdS )z#Test Isolation Forest performs wellg333333?iX  r   Nr2   r   )   r   )lowhighsizer   rR   d   )r   r   g\(\?)r   randnZpermutationr   Zvstackuniformr   r	   r   decision_functionr   rG   )	r    r-   r>   r!   Z
X_outliersr"   y_testrH   Zy_predr%   r%   r&   test_iforest_performance   s    r[   contamination      ?r5   c              	   C   s   ddgddgddgddgddgddgddgddgg}t || d	}|| || }||}t|dd  t|d d kstt|d
dg ddg   d S )NrQ   r   r         	   r   r\      )	r	   r   rY   r   r   minmaxrG   r   )r\   r    r>   rH   Zdecision_funcpredr%   r%   r&   test_iforest_works   s    4

(rh   c                  C   s&   t j} t | }|j|jks"td S N)r6   r,   r	   r   rJ   Z_max_samplesrG   )r>   rH   r%   r%   r&   test_max_samples_consistency   s    rj   c                  C   sV   t d} ttjd d tjd d | d\}}}}tdd}||| || d S )Nr   r(   r)   g?)Zmax_features)r   r   r+   r,   targetr	   r   r   )r-   r!   r"   Zy_trainrZ   rH   r%   r%   r&    test_iforest_subsampled_features   s      
rl   c                  C   s   dt dt j  d } dt dt j  d }ttdgdg ttdgdg ttd	gd
g ttdg| g ttdg|g ttt dd	ddgdd
| |g tt d}t|t | d S )N       @g      @g?g     0@g}?r   g        r   r   r      i  )	r   logZeuler_gammar   r
   r   Zaranger   sort)Z
result_oneZ
result_twoZavg_path_lengthr%   r%   r&    test_iforest_average_path_length   s    
rq   c                  C   s   ddgddgddgg} t dd| }t  | }t|ddgg|ddgg|j  t|ddgg|ddgg|j  t|ddgg|ddgg d S )Nr   r   g?)r\   rm   )r	   r   r   Zscore_samplesrY   Zoffset_)r!   Zclf1Zclf2r%   r%   r&   test_score_samples   s     rr   c                  C   sv   t d} | dd}tdd| dd}|| |jd }|jdd || t|jdks`t|jd |ksrtdS )	z/Test iterative addition of iTrees to an iForestr      r   r*   T)r   r   r   Z
warm_start)r   N)r   rW   r	   r   rA   rM   lenrG   )r-   r>   rH   Ztree_1r%   r%   r&   test_iforest_warm_start   s       


ru   z*sklearn.ensemble._iforest.get_chunk_n_rowsZreturn_valuer   )Zside_effectzcontamination, n_predict_calls)r]   r   )r5   r   c                 C   s   t || | j|kstd S ri   rh   Z
call_countrG   Zmocked_get_chunkr\   Zn_predict_callsr    r%   r%   r&   test_iforest_chunks_works1  s    
rx   r*   c                 C   s   t || | j|kstd S ri   rv   rw   r%   r%   r&   test_iforest_chunks_works2  s    
ry   c                  C   s|  t d} t }||  t jd}t|| dks<tt||	dddksZtt|| d dksttt|| d dkstt 
|	dddd} t }||  t|| dkstt||	dddkstt|t ddkst|	dd} t }||  t|| dks:tt||	dddksZtt|t ddksxtdS )z=Test whether iforest predicts inliers when using uniform data)rV   r*   r   r   rV   r*   N)r   Zonesr	   r   randomRandomStateallr   rG   rW   repeat)r>   Ziforestr-   r%   r%   r&   test_iforest_with_uniform_data  s(    



 r~   c                  C   s2   t dddd\} }t| } tdddd|  d	S )
zdCheck that Isolation Forest does not segfault with n_jobs=2

    Non-regression test for #23252
    iL rV   r   )Z	n_samplesZ
n_featuresr   r*      r   )r   r   rL   N)r   r   r	   r   )r>   _r%   r%   r&   *test_iforest_with_n_jobs_does_not_segfault=  s    r   c               	   C   s^   t ddgddgg} t ddg}t }|| | d}tjt|d |j W 5 Q R X d S )Nr   r   r   r`   r   zoAttribute `base_estimator_` was deprecated in version 1.2 and will be removed in 1.4. Use `estimator_` instead.r0   )r   r   r	   r   r7   r8   FutureWarningZbase_estimator_)r>   ymodelr?   r%   r%   r&   'test_base_estimator_property_deprecatedH  s    r   c               	   C   sf   t d} tjd}| j|ddgd}tddd}t	  t
dt || W 5 Q R X d	S )
zCheck that feature names are preserved when contamination is not "auto".

    Feature names are required for consistency checks during scoring.

    Non-regression test for Issue #25844
    Zpandasr   r`   a)r,   columnsg?rc   r4   N)r7   Zimportorskipr   rz   r{   Z	DataFramerW   r	   r:   r;   r<   r9   r   )pdr-   r>   r   r%   r%   r&   #test_iforest_preserve_feature_namesV  s    

r   )6__doc__r:   Zunittest.mockr   r   numpyr   r7   Zscipy.sparser   r   Zsklearn.datasetsr   r   r   Zsklearn.ensembler	   Zsklearn.ensemble._iforestr
   Zsklearn.metricsr   Zsklearn.model_selectionr   r   Zsklearn.utilsr   Zsklearn.utils._testingr   r   r   r   r6   r+   r'   r.   r@   rI   rK   rP   r[   markZparametrizerh   rj   rl   rq   rr   ru   rx   ry   r~   r   r   r   r%   r%   r%   r&   <module>   sX   
"