U
    -e                     @   s   d Z ddlmZ ddlmZ ddlZddlZddlm	Z
 ddlmZmZmZ ddlmZ ddlmZ dd	 Zd
d Zdd Zdd Zdd Zdd Zdd ZdS )zTest the 20news downloader, if the data is available,
or if specifically requested via environment variable
(e.g. for CI jobs).    )partial)patchN)check_as_framecheck_pandas_dependency_messagecheck_return_X_y	normalize)assert_allclose_dense_sparsec           	      C   s,  | ddd}|j dst| d|jddd dd}|j|jdd  ksNtt|j d	d
gksjtt|j	t|jkstt|j	t|j
kst|j
d	 }|j|jd	  }|j|}|j
t|j|kd	 d	  }||kst| dddd\}}t|t|j
kst|j|jjks(td S )NallF)subsetshuffle.. _20newsgroups_dataset:)r   
categoriesr   r      T)r   r   Z
return_X_y)DESCR
startswithAssertionErrorZtarget_namesnpuniquetargettolistlen	filenamesdataindexwhereshape)	fetch_20newsgroups_fxtr   Z	data2catsZentry1categorylabelZentry2Xy r%   c/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/sklearn/datasets/tests/test_20news.pytest_20news   s&      
r'   c                 C   s\   | dd}t |d t |jks$tt |d t |jks>tt |d t |jksXtdS )zuChecks the length consistencies within the bunch

    This is a non-regression test for a bug present in 0.16.1.
    r
   r   r   r   r   N)r   r   r   r   r   )r    r   r%   r%   r&   test_20news_length_consistency4   s    
r)   c                 C   sj  | dd}t |jr"|jjdks&t|jjdks6t|jjd dksJt|jjtj	ks\t|j
dslt| dd}t |jr|jjdkst|jjd	kst|jjd d
kst|jjtj	kst|j
dstt| dd}t|| | dd}t |jr|jjdkst|jjdks*t|jjd dks@t|jjtj	ksTt|j
dsftd S )Ntrainr(   Zcsr)2,  ; r   r+   r   test)l  r,   r.   r
   )I  r,   r/   )spissparser   formatr   r   r   Zdtyper   Zfloat64r   r   r   r   )!fetch_20newsgroups_vectorized_fxtbunchZ
fetch_funcr%   r%   r&   test_20news_vectorized@   s(    



 r5   c                 C   sf   | dd}| dd}|d d d }|d d d }t |t| ttjj| dddsbtd S )NFr   Tr   d   r   )Zaxis)r	   r   r   ZallcloseZlinalgZnormZtodenser   )r3   r#   ZX_ZX_normr%   r%   r&   test_20news_normalization^   s    

r7   c                    s   t d | dd}t||  |j}|jdks2tt fdd|jjD sPtdD ]}||	 ksTtqTd|	 kszt|j
jdkstd S )	NZpandasTZas_frame)r+   i< c                    s   g | ]}t | jqS r%   )
isinstanceZSparseDtype).0colpdr%   r&   
<listcomp>p   s     z(test_20news_as_frame.<locals>.<listcomp>)beginnerZ	beginnersZ	beginningZ
beginningsZbeginsZbegleyZbegoneZcategory_class)pytestZimportorskipr   framer   r   r
   r   Zdtypeskeysr   name)r3   r4   rA   Zexpected_featurer%   r<   r&   test_20news_as_frameh   s    


	rD   c                 C   s   t |  d S )N)r   )r3   Zhide_available_pandasr%   r%   r&   test_as_frame_no_pandas   s    rE   c                 C   s`   t dN}t d:}d|_d|_d}tjt|d | dd W 5 Q R X W 5 Q R X W 5 Q R X d S )Nzos.path.existszjoblib.loadT)r#   r$   zThe cached dataset located in)matchr8   )r   Zreturn_valuer@   Zraises
ValueError)r3   Zmock_is_existZ	mock_loaderr_msgr%   r%   r&   test_outdated_pickle   s    

rI   )__doc__	functoolsr   Zunittest.mockr   numpyr   r@   Zscipy.sparsesparser0   Z"sklearn.datasets.tests.test_commonr   r   r   Zsklearn.preprocessingr   Zsklearn.utils._testingr	   r'   r)   r5   r7   rD   rE   rI   r%   r%   r%   r&   <module>   s    
