U
    -ep                 %   @   s  d Z ddlZddlZddlmZmZmZ ddlZddl	Z	ddl
mZ ddlmZmZmZmZ ddlmZ ddlmZmZ ddlmZ dd	lmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ dd
l,m-Z-m.Z.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7m8Z8m9Z9m:Z: ddl;m<Z< e e& e( e e dde$ e'dde!ddddge" e# g
Z=e edde% e e gZ>e=e> Z?e@dZAeBdd ZCeeDdZEeFddddddddddddgeFddddddddddddgeFdddddddddddddddgeFddddddddddddddddgddddddddddddgddddddddddddgfZGe ZHe:dd ZIdd  ZJdd!d"ZKd#d$ ZLd%d& ZMd'd( ZNd)d* ZOd+d, ZPe	jQRd-d.d/ge	jQRd0ddd1d2d3d4dge	jQRd5e&e%gd6d7 ZSe	jQRd-d.d/ge	jQRd0dd1d2ge	jQRd5e&e%gd8d9 ZTd:d; ZUe	jQRd5e&e%gd<d= ZVd>d? ZWe	jQRd5ee&e%gd@dA ZXdBdC ZYdDdE ZZdFdG Z[dHdI Z\e	jQRdJeFdgd1 dgd1  eFddddddddddd1d1ge]ddgddgddggfeFdgd4 dgd  eFdddddddddddd1ge]dKdLgdKdLgdKdLggfgdMdN Z^e	jQRdOdPdQdRdSge	jQRdTddUdVgdWdX Z_dYdZ Z`e	jQRd[e$e'ge	jQRd\d]d^d_gd`da Zae	jQRd\dbdcddgdedf Zbe:dgdh Zcdidj Zddkdl Zedmdn Zfdodp Zgdqdr Zhdsdt Zidudv Zjdwdx Zkdydz Zld{d| Zmd}d~ Zndd Zoe:dd Zpe	jQRde"e#gdd Zqdd Zrdd Zsdd Ztdd Zudd Zve	jQRd\dd^d_gdd Zwdd Zxdd Zye:dd Zzdd Z{dd Z|dd Z}e	jQRddddddddgdd Z~dd Zdd Zdd Zdd Zdd Ze	jQRd5ee%gdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Ze	jQRde$ee'fddń ZddǄ ZddɄ Zdd˄ Ze	jQRdee&e%fdd΄ Ze	jQRde d/fed/ddэd/fe& d/fe&d/ddэd/fe%d/ddэd/fe% d/fe"ddҍd/fe#ddҍd/fe$ddҍd/feddҍd/fe'ddҍd/fe d/fe( d/fe d/fe d/feddd/fe ddd/fed/ddэd.fed/ddэd.fe&d/ejddэd.fe&d/ejddэd.fe"ddҍd.fe"ejddҍd.fe#ddҍd.fe#ejddҍd.fe$ddҍd.fe$ejddҍd.feddҍd.feejddҍd.fe'ddҍd.fe'ejddҍd.fgddԄ Ze	jQjRde?ddׄ e?D d؍ddڄ Ze	jQjRde?ddׄ e?D d؍dd݄ ZdS )zTest the split module    N)combinationscombinations_with_replacementpermutations)stats)
coo_matrix
csc_matrix
csr_matrixissparse)comb)load_digitsmake_classification)DummyClassifier)GridSearchCV
GroupKFoldGroupShuffleSplitKFoldLeaveOneGroupOutLeaveOneOutLeavePGroupsOut	LeavePOutPredefinedSplitRepeatedKFoldRepeatedStratifiedKFoldShuffleSplitStratifiedGroupKFoldStratifiedKFoldStratifiedShuffleSplitTimeSeriesSplitcheck_cvcross_val_scoretrain_test_split)_build_repr_validate_shuffle_split_yields_constant_splits)SVC)assert_request_is_empty)MockDataFrame)assert_allcloseassert_array_almost_equalassert_array_equalignore_warnings)_num_samples   p      ?	test_size   n_groups
            123c            "      C   s<  d} d}d}d}d}t ddgddgddgdd	gg}t ddddg}t ddddg}t ddddg}t }	t|}
t|}t|}t }t|}td
d}t	ddddg}t
|}d}d}d}d}d}d}d}d}d}| t| ||||t|||d|g	}tt|	|
|||||||g	|||||||||g	D ]\}\}}|| ||||ksZtt jt||||t|||| ||||D ]:\}} t |jjdkstt | jjdkstq|t|ks2tq2d}!tjt|!d |	d || W 5 Q R X tjt|!d |
d || W 5 Q R X d S )Nr8   r,   r5   r2   r7   r6            r   random_statezLeaveOneOut()zLeavePOut(p=2)z3KFold(n_splits=2, random_state=None, shuffle=False)z=StratifiedKFold(n_splits=2, random_state=None, shuffle=False)LeaveOneGroupOut()LeavePGroupsOut(n_groups=2)zJShuffleSplit(n_splits=10, random_state=0, test_size=None, train_size=None)z.PredefinedSplit(test_fold=array([1, 1, 2, 2]))zBStratifiedGroupKFold(n_splits=2, random_state=None, shuffle=False)iz%The 'X' parameter should not be None.match)nparrayr   r   r   r   r   r   r   r   r   r
   	enumeratezipget_n_splitsAssertionErrortestingassert_equallistsplitasarraydtypekindreprpytestraises
ValueError)"	n_samplesZn_unique_groupsn_splitsr.   Zn_shuffle_splitsXZX_1dygroupsZlooZlpokfskfloloZloposspssgkfZloo_reprZlpo_reprZkf_reprZskf_reprZ	lolo_reprZ	lopo_reprZss_reprZps_reprZ	sgkf_reprZn_splits_expectedrC   cvZcv_reprtraintestmsg rf   i/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/sklearn/model_selection/tests/test_split.py(test_cross_validator_with_default_params]   s    "
 rh   c                  C   sJ  d} t jd}|jdd| dfd}|jdd| fd}|dd}|jdd| dfd}|jdd| fd}t tddt t t	 t
 t t td	d
t t tddtddt t|dg}|D ]}t|||| t|||| zt|||| W q tk
rB }	 z$d}
d|
}|t|	ks2tW 5 d }	~	X Y qX qd S )N   r2   r   r7   r,   )sizer-   r/   r0   r3   rX   )Z	test_fold)binaryZ
multiclassz/Supported target types are: {}. Got 'multilabel)rF   randomRandomStaterandintreshaper   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rN   rO   rV   formatstrrK   )rW   rngrY   rZ   Zy_2dy_multilabelr[   Z	splittersZsplittereZallowed_target_typesre   rf   rf   rg   	test_2d_y   sD    rw   c                 C   sL   t | t | } }| |t  ks&t|d k	rH| |t t|ksHtd S N)setintersectionrK   unionrange)rc   rd   rW   rf   rf   rg   check_valid_split   s    r}   c           
      C   s   t |}| ||||kstt }d}| |||D ](\}}	t||	|d |d7 }||	 q6||kslt|d k	r|tt|kstd S )Nr   )rW   r2   )r+   rJ   rK   ry   rO   r}   updater|   )
rb   rY   rZ   r[   expected_n_splitsrW   Zcollected_test_samplesZ
iterationsrc   rd   rf   rf   rg   check_cv_coverage   s    r   c               	   C   sB  t ddgddgddgg} t ddgddgddgddgd	d
gg}tttd| f t dddddg}td}tjt	dd t||| W 5 Q R X t
d}t t|}tjt	dd t|||| W 5 Q R X t " td t|||d dd W 5 Q R X t " td t||||dd W 5 Q R X t dddddg}tt t||| W 5 Q R X tt t||| W 5 Q R X tt td W 5 Q R X tt td W 5 Q R X d}tjt|d td W 5 Q R X tjt|d td W 5 Q R X tjt|d t
d W 5 Q R X tjt|d t
d W 5 Q R X tt td W 5 Q R X tt td W 5 Q R X tt td W 5 Q R X tt td W 5 Q R X tt t
d W 5 Q R X tt t
d W 5 Q R X tt tdd d W 5 Q R X d S )Nr2   r,   r7   r8   r6   r<   r=   r>   	   r5   rk   zThe least populated classrD   ignorer[   r   r   z>k-fold cross-validation requires at least one train/test split      ?       @rX   shuffle)rF   rG   rV   nextr   rO   r   rT   ZwarnsWarningr   arangelenwarningscatch_warningssimplefilterr   rU   	TypeError)X1X2rZ   Zskf_3Zsgkf_3Znaive_groupsZerror_stringrf   rf   rg   test_kfold_valueerrors   sb    (



r   c                  C   sb   t d} td}t|| d d dd t d}td}t||d d dd dtd|ks^td S )N   r7   )rZ   r[   r      r6   )rF   onesr   r   rJ   rK   )r   r\   r   rf   rf   rg   test_kfold_indicesA  s    

r   c                  C   s   ddgddgddgddgd	d
gg} t d| d d }t|\}}t|ddg t|ddg t|\}}t|ddg t|ddg t d| }t|\}}t|dddg t|ddg t|\}}t|ddg t|dddg d S )Nr2   r,   r7   r8   r6   r<   r=   r>   r   r5   rk   r   )r   rO   r   r)   )r   splitsrc   rd   rf   rf   rg   test_kfold_no_shuffleQ  s    "r   c                  C   s  t dddddg } }td| |}t|\}}t|ddg t|ddg t|\}}t|ddg t|ddg t ddddddddg } }td| |}t|\}}t|ddddg t|dddg t|\}}t|dddg t|ddddg dtd| |kstt d} d	d	d	d
d
d
d
g}dddddddg}t j	t
td| |t
td| | ddddddddg}t |} t j	t
td| |t
td| | d S )Nr8   r2   r   r,   r7   r=   r6   r<   r9   0)rF   r   r   rO   r   r)   rJ   rK   rL   rM   rN   	ones_liker   )rY   rZ   r   rc   rd   y1y2rf   rf   rg    test_stratified_kfold_no_shuffleh  s<    
 
 r   r   FTkr<   r=   r>   r   kfoldc                 C   s  d}t |}t dgtd|  dgtd|   dgtd|   }t t|}t |t| }g }|svd nd}	|| |	|d}
|
j|||d	D ]V\}}tt || t| |d
d tt || t| |d
d |	t| qt 
|dkstd S )N  r8   皙?r   {Gz?r2   {Gz?r@   r   r[   {Gz?Zatol)rF   r   rG   intr   r   bincountrO   r'   appendptprK   )r   r   r   rW   rY   rZ   r[   distr
test_sizesr@   r]   rc   rd   rf   rf   rg   test_stratified_kfold_ratios  s&    
  r   c           
         s   d}t dgtd|  dgtd|   dgtd|   }t t| t t| fdd	}||}tdddgD ]$}t ||}||}	|	|kstqd S )
Nd   r,   r   r   r   r2   r   c                    s0   sd nd}dd |dj  | dD S )Nr   c                 S   s    g | ]\}}t |t |fqS rf   )rN   ).0rc   rd   rf   rf   rg   
<listcomp>  s   zNtest_stratified_kfold_label_invariance.<locals>.get_splits.<locals>.<listcomp>r   r   )rO   )rZ   r@   rY   r[   r   r   r   rf   rg   
get_splits  s        z:test_stratified_kfold_label_invariance.<locals>.get_splits)	rF   rG   r   r   r   r   r   ZtakerK   )
r   r   r   rW   rZ   r   Zsplits_basepermZy_permZsplits_permrf   r   rg   &test_stratified_kfold_label_invariance  s     	r   c                  C   sf   t ddD ]V} tdjt| d}dd |D }t|t| dksNtt|| ks
tq
d S )N   r   r6   rY   c                 S   s   g | ]\}}t |qS rf   r   r   _rd   rf   rf   rg   r     s     z&test_kfold_balance.<locals>.<listcomp>r2   )	r|   r   rO   rF   r   maxminrK   sum)rC   r\   sizesrf   rf   rg   test_kfold_balance  s
    r   c           	   	   C   s   t d}dgd dgd  }t t|}dD ]}| d|d}tddD ]f}||d | |d | |d | }d	d
 |D }t |t | dkstt 	||ksJtqJq0d S )Nr   r   r7   r2      TFr   r   c                 S   s   g | ]\}}t |qS rf   r   r   rf   rf   rg   r     s     z0test_stratifiedkfold_balance.<locals>.<listcomp>)
rF   r   r   r   r|   rO   r   r   rK   r   )	r   rY   rZ   r[   r   rb   rC   r]   r   rf   rf   rg   test_stratifiedkfold_balance  s    
&r   c                  C   s   t d} t dddd}t dddd}td}td}t| |||||D ]T\\}}\}}\}	}
t|||	fdD ]$\}}tt||t|ks|t	q|d||< qVt
|dkst	d S )Nr7   Tr   r   r@   r2   ,  r,   )r   rF   r   zerosrI   rO   r   r   intersect1drK   r   )r\   kf2Zkf3rY   Z	all_foldsZtr1Zte1Ztr2Zte2Ztr3Zte3Ztr_aZtr_brf   rf   rg   test_shuffle_kfold  s    

  
r   c                 C   s  t d}dgd dgd  }t t|}t d}dgd dgd  }t t|}| dddd	}t jt||||t|||| | ddt j	dd	}t
||f||f||fD ]P}t
|j| |j| D ]4\\}	}
\}	}tt t j|
| W 5 Q R X qqd S )
N   r   r=   r2   r>      r7   Tr   )rF   r   r   r   rL   rM   rN   rO   rn   ro   rI   rT   rU   rK   r)   )r   rY   rZ   Zgroups_1r   r   Zgroups_2r\   datar   Ztest_aZtest_brf   rf   rg   2test_shuffle_kfold_stratifiedkfold_reproducibility  s     

 &r   c                  C   s  t d} dgd dgd  }tdddd}tdddd}t|| ||| |D ]$\\}}\}}t|t|ksTtqTt|| |d dd t d	}dgd dgd  }tdddd}tdddd}t	d
d |||D }	t	dd |||D }
|	|
kstd S )N(   r      r2   r6   Tr   r   r5   c                 S   s   g | ]}t |d  qS r2   tupler   srf   rf   rg   r   4  s     z0test_shuffle_stratifiedkfold.<locals>.<listcomp>c                 S   s   g | ]}t |d  qS r   r   r   rf   rf   rg   r   5  s     )
rF   r   r   rI   rO   ry   rK   r   r   sorted)ZX_40rZ   Zkf0Zkf1r   Ztest0test1rY   r   Z	test_set1Z	test_set2rf   rf   rg   test_shuffle_stratifiedkfold"  s    
*
r   c                  C   s   t jd d t jd d  } }tddd}d}t|dd}t|| ||d }d	|ksZt|d
ksftt|ddd}t|| ||d }|d	kstt|ddd}t|| ||d }|d	kstt|}t|| ||d }d|kst|d
kstd S )NiX  r5   g{Gzt?)Cgammar7   Fr   rb   gq=
ףp?皙?Tr   r   r2   gGz?)	digitsr   targetr$   r   r   meanrK   r   )rY   rZ   modelrX   rb   Z
mean_scorerf   rf   rg   1test_kfold_can_detect_dependent_samples_on_digits9  s"    		r   c                  C   s   t dd} tdgd dgd  }t|dd}td}t|t| }g }| |||D ]t\}}t	|| || j
dksttt|| t| |d	d
 tt|| t| |d	d
 |t| qdt|dkstd S )Nr7   rl   r2   r<   r      rk   )r2   r,   r7   r8   r6   r<   r2   r2   r,   r,   r7   r7   r8   r8   r6   r6   r<   r<   r   r   )r   rF   rG   r   rq   rP   r   r   rO   r   rj   rK   r'   r   r   )ra   rZ   rY   r[   r   r   rc   rd   rf   rf   rg   #test_stratified_group_kfold_triviale  s    

  r   c            
      C   s  t dd} tdgd dgd  }t|dd}tddddd	d	dddddd	d
d
d
dddg}tddgddgddgg}g }t| ||||D ]\\\}}}t|| || j	dkst
t|| t| }	t|	|dd |t| qt|dks
t
d S )Nr7   rl   r2   r<   r   r   rk   r,   r8   r6   g-?gsh|??gZd;O?gZd;O?r/   MbP?r   )r   rF   rG   r   rq   rP   rI   rO   r   rj   rK   r   r   r'   r   r   )
ra   rZ   rY   r[   expectedr   rc   rd   expect_dist
split_distrf   rf   rg   'test_stratified_group_kfold_approximatew  s    
. r   zy, groups, expected      ?      ?c           	      C   s   t dd}t| dd}t||| ||D ]N\\}}}t|| || jdksZtt	| | t
| }t||dd q0d S )Nr7   rl   rk   r2   r   r   r   )r   rF   r   rq   rI   rO   r   rj   rK   r   r   r'   )	rZ   r[   r   ra   rY   rc   rd   r   r   rf   rf   rg   .test_stratified_group_kfold_homogeneous_groups  s    
 r   	cls_distr)g?333333?)g333333?ffffff?)皙?r   )r   r   r4   ri   F   c                 C   s*  d}t |d}t|d}tjd}d}|jd|| d}t|dd}|||}	|j|||	d	}
|j|||	d	}d}d}t	|
|D ]~\\}}\}}t
|	| |	| jdkstt|| t| }t|| t| }|tj|| d
7 }|tj|| d
7 }q|| }|| }||ks&td S )Nr6   rl   r   r   r,   )rj   r.   rk   r2   r   )Zqk)r   r   rF   rn   ro   choicer   rq   rO   rI   r   rj   rK   r   r   r   Zentropy)r   r4   rX   ra   Zgkfrt   Zn_pointsrZ   rY   gZ
sgkf_foldsZ	gkf_foldsZ	sgkf_entrZgkf_entrZ
sgkf_trainZ	sgkf_testr   Zgkf_testZ
sgkf_distrZ	gkf_distrrf   rf   rg   /test_stratified_group_kfold_against_group_kfold  s*    

r   c                  C   s   t dddt} t dddt}t tdddt}t tdddt}t| |||D ]x\}}}}t|d |d  t|d |d  t|d |d  t|d |d  t|d |d  t|d |d  q`d S )Nr   r   r1   r@   r,   r2   )r   rO   rY   rF   Zint32r   rI   r)   )Zss1Zss2Zss3Zss4t1t2t3Zt4rf   rf   rg   test_shuffle_split  s    r   split_classztrain_size, exp_train, exp_test)Nr   r2   )r>   r>   r,   )r   r>   r,   c                 C   sR   t d}t d}t| |d||\}}t||ks>tt||ksNtd S Nr5   
train_size)rF   r   r   rO   r   rK   )r   r   	exp_trainexp_testrY   rZ   X_trainX_testrf   rf   rg   $test_shuffle_split_default_test_size  s
    

r   )Nr>   r,   )r=   r=   r7   )r   r=   r7   c                 C   s\   t d}t d}td}tt| d|||\}}t||ksHtt||ksXtd S r   )rF   r   r|   r   r   rO   r   rK   )r   r   r   rY   rZ   r[   r   r   rf   rf   rg   *test_group_shuffle_split_default_test_size  s    

r  c                  C   s0  t d} t dddddddg}tt ttddd| | W 5 Q R X tt ttddd| | W 5 Q R X tt  ttdddd| | W 5 Q R X t d	} t dddddddddg	}tt ttdd
| | W 5 Q R X tt ttdd| | W 5 Q R X d S )Nr=   r   r2   r,   r7   r   r0   r1   r   r   r   )	rF   r   rP   rT   rU   rV   r   r   rO   rY   rZ   rf   rf   rg   "test_stratified_shuffle_split_init  s    
""$
 r  c                  C   s   t dddddddddddddddg} d}d}td||ddt t| | }|D ](\}}t||ksntt||ksVtqVd S )	Nr   r2   r,   r7   r6   r5   r<   )r1   r   r@   )rF   rG   r   rO   r   r   rK   )rZ   r1   r   sssrc   rd   rf   rf   rg   0test_stratified_shuffle_split_respects_test_size  s    (    r  c            	      C   s4  t ddddddddddddgt ddddddddddddgt dddddddddddddddgd t ddddddddddddddddgt dgd dgd  t d	d
 tdD ddddddddddddgddddddddddddgg} | D ]0}tddddt t||}t |}t 	dt| }t|| }|D ]\}}t
t || t ||  t t j|| ddd tt||  }t t j|| ddd tt||  }t||d t|t| |jkstt||kstt||kstt
t ||g  qNqd S )Nr2   r,   r7   r   r8   rk   i   2   c                 S   s   g | ]}|gd |  qS )r   rf   )r   rC   rf   rf   rg   r     s     z6test_stratified_shuffle_split_iter.<locals>.<listcomp>r   r9   r:   r;   r<   gQ?r   T)Zreturn_inverse)rF   rG   Zconcatenater|   r   rO   r   r   Z
asanyarrayceilr)   uniquer   floatr(   rj   rK   r   )	ZysrZ   r  r1   r   rc   rd   Zp_trainZp_testrf   rf   rg   "test_stratified_shuffle_split_iter  s<      *(
 


r  c                     s  d} d  fdd}dD ]x}t |d ddg }t d	|  dd
}dg| }dg| }d}|jt ||dD ]D\}}	|d7 }||f||	ffD ]"\}
}|D ]}|
|  d7  < qqqr| kstt|d	|  d	d	|   d\}}t||kstt|	|ksttt|	|	dks tt 
|}|jd	|  ks>t|| t|ksTtt|dksftt|| }t|| }||| ||| qd S )Nr6   r   c                    s<   d  }t  |}| D ]}||}||kstdqd S )N皙?z=An index is not drawn with chance corresponding to even draws)r   ZbinomZpmfrK   )Z
idx_countsr.   	thresholdbfcountZprobrl   rf   rg   assert_counts_are_ok>  s    
z@test_stratified_shuffle_split_even.<locals>.assert_counts_are_ok)r<      r,   r   r2         ?rX   r1   r@   r  r  )rF   rG   r   rO   r   rK   r"   r   ry   rz   r	  r1   r
  )Zn_foldsr  rW   r[   r   Ztrain_countsZtest_countsn_splits_actualrc   rd   counteridsidZn_trainZn_testZgroup_countsZ	ex_test_pZ
ex_train_prf   rl   rg   "test_stratified_shuffle_split_even8  sF    
  

  



r  c                  C   s|   ddddgd ddgd  } t | }tdddd}t|j|| d	\}}tt ||g  tt ||t t	|  d S )
Nr   r2   r,   r7   r8   r6   r/   r  r  )
rF   r   r   r   rO   r)   r   union1dr   r   )rZ   rY   r  rc   rd   rf   rf   rg   4test_stratified_shuffle_split_overlap_train_test_bugl  s    
r  c                  C   s  t ddgddgddgddggt ddgddgddgddggfD ]} t | }tdddd}t|j|| d\}}| | }| | }tt ||g  tt ||t 	t
|  t | d d df }|t |d d df kst|t |d d df ksDtqDd S )Nr   r2   r/   r  r  )rF   rG   r   r   r   rO   r)   r   r  r   r   r   rK   )rZ   rY   r  rc   rd   y_trainy_testexpected_ratiorf   rf   rg   (test_stratified_shuffle_split_multilabel}  s      
r  c            
      C   s   dddgdgd  dddg } dddgdgd  dddg }t | gd |gd  }t |}tdddd}t|j||d\}}|| }|| }t |d d d	f }	|	t |d d d	f kst|	t |d d d	f kstd S )
Nr2   r   r   r5   r   r/   r  r  r8   )rF   rG   r   r   r   rO   r   rK   )
Zrow_with_many_zerosZrow_with_many_onesrZ   rY   r  rc   rd   r  r  r  rf   rf   rg   4test_stratified_shuffle_split_multilabel_many_labels  s    
r  c            	      C   s   t dd} g }g }ttdddtD ](\}\}}|| || || |< q*t| }tt 	| |
 ksvtt|  \}}t|| t|| d S )Nr5   g      r6   Tr   )rF   fullrH   r   rO   rY   r   r   r   r	  rJ   rK   rI   r)   )	foldsZkf_trainZkf_testrC   Z	train_indZtest_indr`   Zps_trainZps_testrf   rf   rg   %test_predefinedsplit_with_kfold_split  s    "



r"  c                  C   sT  t D ]H} tt|  }}d}d}t||dd}t| |j||| d|ksRtt| }t	| }|j
||| dD ]\}}	t|| }
t||	 }tt|| |rttt||	 |
rt|| j||	 j |jksttt||	g  tt|t|t|  dks$ttt|
td| t|  dksvtqvqd S )Nr<   gUUUUUU?r   r   r   r2   r  )test_groupsrF   r   r   r   rS   rJ   rK   r	  rP   rO   anyisinrj   r)   r   absround)groups_irY   rZ   rX   r1   ZsloZl_uniquelrc   rd   Zl_train_uniqueZl_test_uniquerf   rf   rg   test_group_shuffle_split  s&    


&"r*  c               	   C   s  t  } tdd}tdd}t| dks*tt|dks:tt|dksJtttdddks`tt| df|df|dffD ]\}\}}ttD ]\}}tt|}|dkr|n||d  d }	t	t| }
}|j
|
||d	|	kstt|}|j|
||d	D ]d\}}tt|| ||  g  t|t| t|ksHtt|| jd
 st|qqqz| 
d d dddddgdkst| j
ddddgd	dkst|
d d tddkst|j
tdd	dksttt | 
d d dtjdg W 5 Q R X tt |
d d dtjdg W 5 Q R X d}tjt|d | 
d d d  W 5 Q R X tjt|d |
d d d  W 5 Q R X d S )Nr2   r3   r,   rA   zLeavePGroupsOut(n_groups=1)rB   r7   zLeavePGroupsOut(n_groups=3)r   r   abcr  皙?g333333?r8   r<   g        z*The 'groups' parameter should not be None.rD   )r   r   rS   rK   rH   r#  r   rF   r	  r   rJ   rP   rO   r)   r   tolistshaper   rT   rU   rV   naninf)ZlogoZlpgo_1Zlpgo_2jrb   Zp_groups_outrC   r(  r4   rX   rY   rZ   Z
groups_arrrc   rd   re   rf   rf   rg   test_leave_one_p_group_out  sD    

&
 &"  r4  c               
   C   s  t ddddddddg} t t| }t j| dd}t j|| d}t j|| d}tddj|| d}tddj|| d}d|d d < ||f||ffD ]8\}}t||D ]$\\}	}
\}}t|	| t|
| qqdtddj	||| d	kst
dt j	||| d	kst
d S )
Nr   r2   r,   T)copyr   r3   r7   )rZ   r[   )rF   rG   r   r   r   rO   r   rI   r)   rJ   rK   )r[   rY   Zgroups_changingr^   Zlolo_changingZlploZlplo_changingZlloZllo_changingrc   rd   Z
train_chanZ	test_chanrf   rf   rg   $test_leave_group_out_changing_groups  s    
r6  c                  C   s   t ddddddg} t t| }tt j|| d}ddddgddgfddddgddgfddddgddgfg}|D ](\}}t|\}}t|| t|| qvd S )Nr,   r   r2   r   r8   r6   r7   )	rF   rG   r   r   iterr   rO   r   r)   )r[   rY   r   Zexpected_indicesZexpected_trainZexpected_testrc   rd   rf   rf   rg   %test_leave_group_out_order_dependence0  s    
r8  c               	   C   sL  t d }  }}td}tjt|d tt 	| || W 5 Q R X t d }  }}td| d}tjt|d tt 	| || W 5 Q R X t d }  }}td| d}tjt|d tt
d	d
	| || W 5 Q R X t d	 }  }}td| d}tjt|d tt
d	d
	| || W 5 Q R X d S )Nr   zFound array with 0 sample(s)rD   r2   z:The groups parameter contains fewer than 2 unique groups (z'). LeaveOneGroupOut expects at least 2.z^The groups parameter contains fewer than (or equal to) n_groups (3) numbers of unique groups (zR). LeavePGroupsOut expects that at least n_groups + 1 (4) unique groups be presentr7   r3   )rF   r   reescaperT   rU   rV   r   r   rO   r   r   )rY   rZ   r[   re   rf   rf   rg   :test_leave_one_p_group_out_error_on_fewer_number_of_groupsD  s,    


"
r;  c               
   C   sR   t tfD ]D} tt | dd W 5 Q R X tt | dd W 5 Q R X qd S )Nr   )	n_repeatsr   )r   r   rT   rU   rV   r   rf   rf   rg   test_repeated_cv_value_errorsg  s
    r=  
RepeatedCVc                 C   s6   d\}}| ||d}d |jj}|t|ks2td S )N)r,   r<   rX   r<  z.{}(n_repeats=6, n_splits=2, random_state=None))rr   	__class____name__rS   rK   )r>  rX   r<  Zrepeated_cvZrepeated_cv_reprrf   rf   rg   test_repeated_cv_reprq  s    rB  c               
   C   s  ddgddgddgddgd	d
gg} d}t dd|d}tdD ]}|| }t|\}}t|ddg t|dddg t|\}}t|dddg t|ddg t|\}}t|ddg t|dddg t|\}}t|dddg t|ddg tt t| W 5 Q R X q<d S )Nr2   r,   r7   r8   r6   r<   r=   r>   r   r5   i{icrX   r<  r@   r   )r   r|   rO   r   r)   rT   rU   StopIteration)rY   r@   rkfr   r   rc   rd   rf   rf   rg   &test_repeated_kfold_determinstic_split{  s&    "
rF  c                  C   s0   d} d}t | |d}| | }|| ks,td S Nr7   r8   r?  )r   rJ   rK   )rX   r<  rE  r   rf   rf   rg   $test_get_n_splits_for_repeated_kfold  s
    rH  c                  C   s0   d} d}t | |d}| | }|| ks,td S rG  )r   rJ   rK   )rX   r<  rskfr   rf   rf   rg   /test_get_n_splits_for_repeated_stratified_kfold  s
    rJ  c               
   C   s&  ddgddgddgddgd	d
gg} dddddg}d}t dd|d}tdD ]}|| |}t|\}}t|ddg t|dddg t|\}}t|dddg t|ddg t|\}}t|ddg t|dddg t|\}}t|dddg t|ddg tt t| W 5 Q R X qJd S )Nr2   r,   r7   r8   r6   r<   r=   r>   r   r5   r   iqsrC  )r   r|   rO   r   r)   rT   rU   rD  )rY   rZ   r@   rI  r   r   rc   rd   rf   rf   rg   1test_repeated_stratified_kfold_determinstic_split  s(    "rK  c                	   C   s  t tt t jtttddd t jtttdddd t jtttdtdtdd t jtttddd t jtttddd	d t jtttddd
 t tttdtd t jtttdddd t jtdd ttdddd W 5 Q R X d S )Nr7   r.  r   r   r  Z
wrong_typer0   r,   r8   )Zsome_argument*   r5   FT)r   stratifyzrtrain_size=11 should be either positive and smaller than the number of samples 10 or a float in the \(0, 1\) rangerD   r   r2   )r   r1   )rT   rU   rV   r    r|   rF   Zfloat32r   rf   rf   rf   rg   test_train_test_split_errors  s(    rN  )Nr=   r7   c                 C   s4   t t| d\}}t||ks tt||ks0td S )Nr   )r    rY   r   rK   )r   r   r   r   r   rf   rf   rg   'test_train_test_split_default_test_size  s    rO  c                  C   sD  t dd} t| }t d}t| |d dd}|\}}}}t|t|ksRtt|d d df |d  t|d d df |d  t| || }|\}}}}	}}t	|t
stt	|t
stt dddd	d
}
t dddd}t|
|}|d jdks
t|d jdkst|d
 jdks2t|d	 jdksFtt ddddd
d
d
d
g}td
ddddgd
dd
ddgD ]p\}}t|||dd\}}t||kstt|t| t|kstt |dkt |d
kks~tq~t d}dD ]@}t|d|d\}}t|ddg t|ddd
d	ddddg qd S )Nr   r5   r5   r5   r/   r  r   r   r6   r7   r,   i  r=   r   )r=   r6   r7   r,   r2   )r7   r6   r7   r,   )r=   r=   r   )r7   r=   r   r8   r   r   r<   )r1   rM  r@   )r,   r   F)r   r1   r>   r   )rF   r   rq   r   r    r   rK   r)   r/  
isinstancerN   r0  rG   rI   r   )rY   X_srZ   rO   r   r   r  r  Z	X_s_trainZX_s_testZX_4dZy_3dr1   Zexp_test_sizerc   rd   rf   rf   rg   test_train_test_split  sF    

&   
&
rS  c                  C   sb   d} t | }|d|  k}t|||dd}|\}}}}|j|j | ksJt|j|j | ks^tdS )zCheck for integer overflow on 32-bit platforms.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20774
    i Gz?r   )rM  r   N)rF   r   r    rj   rK   )Z
big_numberrY   rZ   rO   r   r   r  r  rf   rf   rg   $test_train_test_split_32bit_overflow  s    	
rU  c                  C   st   t g} zddlm} | | W n tk
r4   Y nX | D ]4}|t}t|\}}t||s`tt||s:tq:d S )Nr   )	DataFrame)	r&   ZpandasrV  r   ImportErrorrY   r    rQ  rK   )typesrV  InputFeatureTypeX_dfr   r   rf   rf   rg   test_train_test_split_pandas4  s    r[  c                  C   sh   t dd} tttg}|D ]D}|| }t|\}}t|rH|jdksLt	t|r^|jdkst	qd S )Nr   rP  Zcsr)
rF   r   rq   r   r   r   r    r	   rr   rK   )rY   Zsparse_typesrY  rR  r   r   rf   rf   rg   test_train_test_split_sparseF  s    
r\  c                  C   s@   t t} t| \}}t|t s"tt|t s0tt| \}}d S rx   )r&   rY   r    rQ  rK   )rZ  r   r   ZX_train_arrZ
X_test_arrrf   rf   rg   !test_train_test_split_mock_pandasR  s
    r]  c                  C   s   t d} dgd dgd  }t t dt df}| }dD ]}t| ||rV|nd dd\}}}}t| ||rv|nd dd\}	}
}}t| ||r|nd dd\}}}}t j||	 t j|| t j|| t j|| qDd S )	Nr=   r9   r8   r   r7   r   r   )rM  r@   )rF   r   hstackr   r/  r    rL   rM   )rY   r   r   y3rM  ZX_train1ZX_test1Zy_train1Zy_test1ZX_train2ZX_test2Zy_train2Zy_test2ZX_train3ZX_test3Zy_train3Zy_test3rf   rf   rg    test_train_test_split_list_input[  s6    
  
   
   
 r`  ztest_size, train_size)r   N)r  N)r   gffffff?)Ny              ?)r   N)r5   N)r>   r7   c              	   C   s0   t t tt| |dt W 5 Q R X d S )Nr  )rT   rU   rV   r   r   rO   rY   r  rf   rf   rg   test_shufflesplit_errorss  s    ra  c                  C   s8   t dd} tdd | tD dd | tD  d S )N   r?   c                 S   s   g | ]\}}|qS rf   rf   )r   r+  r,  rf   rf   rg   r     s     z2test_shufflesplit_reproducible.<locals>.<listcomp>)r   r)   rO   rY   )r_   rf   rf   rg   test_shufflesplit_reproducible|  s    
rc  c                  C   s   t ddd} td}dgd dgd  }ttdtdf}| }tjt| 	||t| 	|| tjt| 	||t| 	|| d S )	Nr,   rL  r   r=   r9   r8   r   r7   )
r   rF   r   r^  r   r/  rL   rM   rN   rO   )r  rY   r   r   r_  rf   rf   rg   &test_stratifiedshufflesplit_list_input  s    
&rd  c                  C   sX   t jdt jddd} t j| dd d f< t ddg| jd d }t| |dd	d
 d S )N   rQ   r5   rk   r,   r   r2   r   rL  r   )rF   r   Zfloat64rq   r1  repeatr0  r    r  rf   rf   rg    test_train_test_split_allow_nans  s    rh  c                  C   s^  t d} tddd}t jttd| t||  t dddddddddg	}td|dd}t jtt	d| |t|| | t dddddddddg	}td|dd}t jtt	d| |t|| | |
d	d}td|dd}t jtt	d| |t|| | t tt	d| |d ttd| |d krdtt d
} t ddddgddddgddddgddddgddddgg}td|dd}t jttd| t||  t ddgddgddgddgddgg}td|dd}t jttd| t||  tt tdd W 5 Q R X d S )Nr   r7   F)
classifierr   r2   Tr,   rk   r6   r^   r   )rF   r   r   rL   rM   rN   r   rO   rG   r   rq   allr   rK   rT   rU   rV   )rY   rb   Zy_binaryZy_multiclassZy_multiclass_2dru   Zy_multioutputrf   rf   rg   test_check_cv  sJ    
&  

4&(&rk  c                  C   s   t  tt} t| }tjt|ttt|tt t dddtt}t|}tjt|ttt|tt z.d}tjt|ttt|tt W n t	k
r   d}Y nX |rt	dd S )NTr   r   FzVIf the splits are randomized, successive calls to split should yield different results)
r   rO   rY   rZ   r   rF   rL   rM   rN   rK   )Zkf_iterZkf_iter_wrappedZkf_randomized_iterZkf_randomized_iter_wrappedZsplits_are_equalrf   rf   rg   test_cv_iterable_wrapper  s.     
rl  c              (   C   s  t jd}d}d}d}t | }}d| }|d||}|| }	tt | t |}
| |d}t|	|||D ]\}\}}||
|< qxt|
t|kst
t |
D ] }|tt|
|k|	 kst
qt |D ]"}tt |
||k dkst
qt j|td}|	|||D ],\}}tt || || dkst
qt d	d
ddd
dddddddddd
ddddddddddddddd d!ddd"d#d$d%d&g&}tt |}t|}d}d| }|| }	t | }}t |}
t|	|||D ]\}\}}||
|< qt|
t|kst
t |
D ]$}|tt|
|k|	 ks(t
q(t D td't t |D ]&}tt |
||k dksnt
qnW 5 Q R X t j|td}|	|||D ],\}}tt || || dkst
qt|	||| }t|	||||D ]&\\}}\}}t|| t|| qt dddd(d(g}t t| }}tjtd)d* ttd+d	||| W 5 Q R X d S ),Nr   r   r   r6   r  rl   r2   rf  ZAlbertZJeanZBertrandZMichelZFrancisZRobertZRachelZLoisZMichelleZBernardZMarionZLauraZFranckZJohnZGaelZAnnaZAlixZDavidZTonyZAbelZBeckyZMadmoodZCaryZMaryZ	AlexandreZBarackZAbdoulZRashaXiZSilviar   r,   z%Cannot have number of splits.*greaterrD   r7   )rF   rn   ro   r   rp   r   r	  r   rH   rO   rK   r&  r   rP   objectr   rG   r   r   r   FutureWarningrN   r/  rI   r)   rT   rU   rV   r   r   )r   rt   r4   rW   rX   rY   rZ   Z	tolerancer[   Zideal_n_groups_per_foldr!  ZlkfrC   r   rd   grouprc   Zcv_iterZtrain1r   train2Ztest2rf   rf   rg   test_group_kfold  s    


 &+
"
.&$
rr  c               	   C   sn  ddgddgddgddgd	d
gddgddgg} t jtdd ttdd|  W 5 Q R X td}|| d d }t|\}}t|ddg t|ddg t|\}}t|ddddg t|ddg td| }t|\}}t|dddg t|ddg t|\}}t|dddddg t|ddg td| }tt|}||	 ks\t
|dksjt
d S )Nr2   r,   r7   r8   r6   r<   r=   r>   r   r5   r   r      r   z$Cannot have number of folds.*greaterrD   rl   rk   r   )rT   rU   rV   r   r   rO   r)   r   rN   rJ   rK   )rY   Ztscvr   rc   rd   r  rf   rf   rg   test_time_series_cvb  s,    .rt  c                 C   s^   t | |D ]N\\}}\}}t|| t||ks4ttt|| d}t|||d   q
d S )Nr   )rI   r)   r   rK   r   )r   check_splitsmax_train_sizerc   rd   Zcheck_trainZ
check_testZsuffix_startrf   rf   rg   !_check_time_series_max_train_size  s
    
rw  c                  C   s~   t d} tdd| }tddd| }t||dd tddd| }t||dd tddd| }t||dd d S )N)r<   r2   r7   rl   )rX   rv  )rv  r,   r6   )rF   r   r   rO   rw  )rY   r   ru  rf   rf   rg   test_time_series_max_train_size  s    
rx  c               	   C   sJ  t d} tddd| }t|\}}t|dg t|dddg t|\}}t|ddddg t|ddd	g t|\}}t|ddddddd	g t|d
ddg tdddd| }t|\}}t|ddddg t|d	d
g t|\}}t|ddd	d
g t|ddg tjtdd  tddd| }t| W 5 Q R X d S )Nr5   r2   r7   )rX   r1   r   r2   r,   r8   r6   r<   r=   r>   r   )rX   r1   rv  zToo many splits.*with test_sizerD   	rF   r   r   rO   r   r)   rT   rU   rV   rY   r   rc   rd   rf   rf   rg   test_time_series_test_size  s*    
r|  c               	   C   s  t d} tddd| }t|\}}t|ddg t|dddg t|\}}t|dddd	dg t|d
ddg td	ddd| }t|\}}t|ddg t|ddg t|\}}t|dd	g t|dd
g t|\}}t|ddg t|ddg tddddd| }t|\}}t|dddd	g t|dd
g t|\}}t|dd	ddg t|ddg tddd	d| }t|\}}t|ddg t|dddg t|\}}t|dddd	dg t|d
ddg tjtdd  tddd| }t| W 5 Q R X d S )Nry  r,   )rX   gapr   r2   r8   r6   r<   r7   r=   r>   r   )rX   r}  rv  )rX   r}  rv  r1   )rX   r}  r1   zToo many splits.*and gaprD   rz  r{  rf   rf   rg   test_time_series_gap  sF    
r~  c               
   C   s   t jd} tdddd\}}| ddd}t tddt tddt t	 t
dddg}t|dD ]:\}}tt d	d
dgi|dd}t|||||d|id qdd S )Nr   r   r,   )rW   Z	n_classesr@   r6   rl   r7   )rX   r@   ZstrategyZ
stratifiedZmost_frequentraise)Z
param_gridrb   Zerror_scorer[   )rY   rZ   r[   rb   Z
fit_params)rF   rn   ro   r   rp   r   r   r   r   r   r   r   r   r   r   )rt   rY   rZ   r[   ZcvsZinner_cvZouter_cvgsrf   rf   rg   test_nested_cv  s4    


     r  c                  C   s(   G dd d} t | dddks$td S )Nc                   @   s   e Zd ZdddZdd ZdS )z%test_build_repr.<locals>.MockSplitterr   Nc                 S   s   || _ || _|| _d S rx   )r+  r,  r-  )selfr+  r,  r-  rf   rf   rg   __init__  s    z.test_build_repr.<locals>.MockSplitter.__init__c                 S   s   t | S rx   )r!   )r  rf   rf   rg   __repr__  s    z.test_build_repr.<locals>.MockSplitter.__repr__)r   N)rA  
__module____qualname__r  r  rf   rf   rf   rg   MockSplitter  s   
r  r6   r<   zMockSplitter(a=5, b=6, c=None))rS   rK   )r  rf   rf   rg   test_build_repr  s    	r  
CVSplitterc              	   C   sN   | dd}dggdg }}t jtdd t|j||dgd W 5 Q R X d S )NrT  r0   r2   r   [With n_samples=1, test_size=0.99 and train_size=None, the resulting train set will be emptyrD   r   )rT   rU   rV   r   rO   )r  rb   rY   rZ   rf   rf   rg   !test_shuffle_split_empty_trainset$  s    
r  c               	   C   sj   dgg\} t jtdd t| dd W 5 Q R X dgdgdgg} t jtdd t| dd W 5 Q R X d S )Nr2   r  rD   rT  r0   z[With n_samples=3, test_size=0.67 and train_size=None, the resulting train set will be emptygq=
ףp?)rT   rU   rV   r    r   rf   rf   rg   $test_train_test_split_empty_trainset4  s    
r  c               	   C   sD   t  } dggdg }}tjtdd t| || W 5 Q R X d S )Nr2   r   z+Cannot perform LeaveOneOut with n_samples=1rD   )r   rT   rU   rV   r   rO   rb   rY   rZ   rf   rf   rg   !test_leave_one_out_empty_trainsetJ  s    r  c               	   C   sV   t dd} dgdggddg }}tjtdd t| j||ddgd W 5 Q R X d S )	Nr,   r-   r2   r   r7   z6p=2 must be strictly less than the number of samples=2rD   r   )r   rT   rU   rV   r   rO   r  rf   rf   rg   test_leave_p_out_empty_trainsetR  s    
 r  Klassc              	   C   s,   t jtdd | dddd W 5 Q R X d S )Nz$has no effect since shuffle is FalserD   r7   Fr   r   )rT   rU   rV   )r  rf   rf   rg   test_random_state_shuffle_false\  s    r  zcv, expected{   r   r?   c                 C   s   t | |kstd S rx   )r#   rK   )rb   r   rf   rf   rg   test_yields_constant_splitsc  s    %r  rb   c                 C   s   g | ]}t |qS rf   rs   r   rb   rf   rf   rg   r     s     r   )r  c                 C   sZ   t | dst|  }| tkr4|jjd dksHtn| tkrH|jjrHtt|dgd dS )z>Check get_metadata_routing returns the correct MetadataRouter.get_metadata_routingr[   TrO   )excludeN)hasattrrK   r  GROUP_SPLITTERSrO   requestsNO_GROUP_SPLITTERSr%   )rb   metadatarf   rf   rg   "test_splitter_get_metadata_routing  s    r  c                 C   s   g | ]}t |qS rf   r  r  rf   rf   rg   r     s     c                 C   s2   | t krt| ds.tn| tkr.t| dr.tdS )zJCheck set_split_request is defined for group splitters and not for others.Zset_split_requestN)r  r  rK   r  r   rf   rf   rg   test_splitter_set_split_request  s    r  )N)__doc__r9  r   	itertoolsr   r   r   numpyrF   rT   Zscipyr   Zscipy.sparser   r   r   r	   Zscipy.specialr
   Zsklearn.datasetsr   r   Zsklearn.dummyr   Zsklearn.model_selectionr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    Zsklearn.model_selection._splitr!   r"   r#   Zsklearn.svmr$   Z#sklearn.tests.test_metadata_routingr%   Zsklearn.utils._mockingr&   Zsklearn.utils._testingr'   r(   r)   r*   Zsklearn.utils.validationr+   r  r  ZALL_SPLITTERSr   rY   r   rZ   eyeZP_sparserG   r#  r   rh   rw   r}   r   r   r   r   r   markZparametrizer   r   r   r   r   r   r   r   r   r   rP   r   r   r   r   r  r  r  r  r  r  r  r  r"  r*  r4  r6  r8  r;  r=  rB  rF  rH  rJ  rK  rN  rO  rS  rU  r[  r\  r]  r`  ra  rc  rd  rh  rk  rl  rr  rt  rw  rx  r|  r~  r  r  r  r  r  r  r  rn   ro   r  r  r  rf   rf   rf   rg   <module>   s  T
  &(
U'
I+


,  

  

%4%7#
	
	 
	/
	
.!
{%#9 


$

