U
    “Ç-e  ã                   @   sÐ   d dl Zd dlmZmZmZ d dlmZmZm	Z	 d dl
mZ dd„ Zdd„ Zd	d
„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd „ ZG d!d"„ d"e	ƒZed#krÌe ¡  dS )$é    N)ÚcudaÚint32Úfloat32)Úskip_on_cudasimÚunittestÚCUDATestCase)ÚENABLE_CUDASIMc                 C   s   t  d¡}t  ¡  || |< d S ©Né   )r   ÚgridÚsyncthreads©ÚaryÚi© r   úb/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/numba/cuda/tests/cudapy/test_sync.pyÚuseless_syncthreads   s    
r   c                 C   s   t  d¡}t  ¡  || |< d S r	   ©r   r   Úsyncwarpr   r   r   r   Úuseless_syncwarp   s    
r   c                 C   s    t  d¡}t  d¡ || |< d S )Nr
   éÿÿ  r   r   r   r   r   Úuseless_syncwarp_with_mask   s    

r   c                 C   sð   t j dt¡}t  d¡}|||< t  ¡  |dk rR|| ||d   ||< t  d¡ |dk r||| ||d   ||< t  d¡ |dk r¦|| ||d   ||< t  d¡ |d	k rÐ|| ||d	   ||< t  d
¡ |dkrì|d |d  | d< d S )Né    r
   é   r   é   éÿ   é   é   é   é   r   )r   ÚsharedÚarrayr   r   r   )ÚresÚsmr   r   r   r   Úcoop_syncwarp   s$    




r$   c                 C   sR   d}t j |t¡}t  d¡}|dkr:t|ƒD ]}|||< q,t  ¡  || | |< d S )Néd   r
   r   )r   r    r!   r   r   Úranger   )r   ÚNr#   r   Újr   r   r   Úsimple_smem4   s    

r)   c                 C   sT   t  d¡\}}t j dt¡}|d |d  |||f< t  ¡  |||f | ||f< d S )Nr   ©é
   é   r
   ©r   r   r    r!   r   r   )r   r   r(   r#   r   r   r   Úcoop_smem2d?   s
    r.   c                 C   s<   t  d¡}t j dt¡}|d ||< t  ¡  || | |< d S )Nr
   r   r   r-   )r   r   r#   r   r   r   Údyn_shared_memoryG   s
    
r/   c                 C   s,   | d  d7  < t  ¡  | d  d7  < d S ©Nr   é{   iA  )r   Zthreadfence©r   r   r   r   Úuse_threadfenceO   s    r3   c                 C   s,   | d  d7  < t  ¡  | d  d7  < d S r0   )r   Zthreadfence_blockr2   r   r   r   Úuse_threadfence_blockU   s    r4   c                 C   s,   | d  d7  < t  ¡  | d  d7  < d S r0   )r   Zthreadfence_systemr2   r   r   r   Úuse_threadfence_system[   s    r5   c                 C   s    t  d¡}t  | | ¡||< d S r	   )r   r   Zsyncthreads_count©Úary_inÚary_outr   r   r   r   Úuse_syncthreads_counta   s    
r9   c                 C   s    t  d¡}t  | | ¡||< d S r	   )r   r   Zsyncthreads_andr6   r   r   r   Úuse_syncthreads_andf   s    
r:   c                 C   s    t  d¡}t  | | ¡||< d S r	   )r   r   Zsyncthreads_orr6   r   r   r   Úuse_syncthreads_ork   s    
r;   c                 C   s   t rdS t ¡ j| kS d S )NT)r   r   Zget_current_deviceZcompute_capability)Úccr   r   r   Ú_safe_cc_checkp   s    r=   c                   @   sü   e Zd Zdd„ Zdd„ Zedƒdd„ ƒZedƒe e	dƒd	¡d
d„ ƒƒZ
edƒe e	dƒd	¡dd„ ƒƒZdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zd d!„ Zd"d#„ Zd$d%„ Zd&d'„ Zd(d)„ Zd*d+„ Zd,d-„ Zd.d/„ Zd0d1„ Zd2S )3ÚTestCudaSyncc                 C   sT   t  d¡|ƒ}d}tj|tjd}tj|tjd}|d|f |ƒ tj ||¡ d S )Núvoid(int32[::1])r+   ©Zdtyper
   )r   ÚjitÚnpÚemptyr   ÚarangeÚtestingÚassert_equal)ÚselfZkernelÚcompiledÚnelemr   Úexpr   r   r   Ú_test_uselessx   s    zTestCudaSync._test_uselessc                 C   s   |   t¡ d S ©N)rK   r   ©rG   r   r   r   Útest_useless_syncthreads€   s    z%TestCudaSync.test_useless_syncthreadsz#syncwarp not implemented on cudasimc                 C   s   |   t¡ d S rL   )rK   r   rM   r   r   r   Útest_useless_syncwarpƒ   s    z"TestCudaSync.test_useless_syncwarp)é   r   z'Partial masks require CC 7.0 or greaterc                 C   s   |   t¡ d S rL   )rK   r   rM   r   r   r   Útest_useless_syncwarp_with_mask‡   s    z,TestCudaSync.test_useless_syncwarp_with_maskc                 C   sP   d}d}d}t  d¡tƒ}tjdtjd}|||f |ƒ tj ||d ¡ d S )Nið  r   r
   r?   r@   r   )r   rA   r$   rB   Úzerosr   rE   rF   )rG   ÚexpectedZnthreadsZnblocksrH   r"   r   r   r   Útest_coop_syncwarp   s    zTestCudaSync.test_coop_syncwarpc              	   C   sV   t  d¡tƒ}d}tj|tjd}|d|f |ƒ |  t |tj|tjdk¡¡ d S )Nr?   r%   r@   r
   )	r   rA   r)   rB   rC   r   Ú
assertTrueÚallrD   )rG   rH   rI   r   r   r   r   Útest_simple_smemœ   s
    zTestCudaSync.test_simple_smemc                 C   s’   t  d¡tƒ}d}tj|tjd}|d|f |ƒ t |¡}t|jd ƒD ]0}t|jd ƒD ]}|d |d  |||f< q\qJ|  	t 
||¡¡ d S )Nzvoid(float32[:,::1])r*   r@   r
   r   )r   rA   r.   rB   rC   r   Z
empty_liker&   ÚshaperU   Zallclose)rG   rH   rX   r   rJ   r   r(   r   r   r   Útest_coop_smem2d£   s    
zTestCudaSync.test_coop_smem2dc              
   C   sf   t  d¡tƒ}d}tj|tjd}|d|d|jd f |ƒ |  t |dtj	|jtj
d k¡¡ d S )Nzvoid(float32[::1])é2   r@   r
   r   r   r   )r   rA   r/   rB   rC   r   ÚsizerU   rV   rD   r   )rG   rH   rX   r   r   r   r   Útest_dyn_shared_memory®   s
    z#TestCudaSync.test_dyn_shared_memoryc                 C   sb   t d d … f}t |¡tƒ}tjdtj d}|d |ƒ |  d|d ¡ ts^|  d| 	|¡¡ d S )Nr+   r@   ©r
   r
   é¼  r   z
membar.gl;)
r   r   rA   r3   rB   rR   ÚassertEqualr   ÚassertInÚinspect_asm©rG   ÚsigrH   r   r   r   r   Útest_threadfence_codegenµ   s    z%TestCudaSync.test_threadfence_codegenc                 C   sb   t d d … f}t |¡tƒ}tjdtj d}|d |ƒ |  d|d ¡ ts^|  d| 	|¡¡ d S )Nr+   r@   r]   r^   r   zmembar.cta;)
r   r   rA   r4   rB   rR   r_   r   r`   ra   rb   r   r   r   Útest_threadfence_block_codegen¿   s    z+TestCudaSync.test_threadfence_block_codegenc                 C   sb   t d d … f}t |¡tƒ}tjdtj d}|d |ƒ |  d|d ¡ ts^|  d| 	|¡¡ d S )Nr+   r@   r]   r^   r   zmembar.sys;)
r   r   rA   r5   rB   rR   r_   r   r`   ra   rb   r   r   r   Útest_threadfence_system_codegenÉ   s    z,TestCudaSync.test_threadfence_system_codegenc                 C   s^   t  t¡}tjd|d}tjdtjd}d|d< d|d< |d ||ƒ |  t |dk¡¡ d S )NéH   r@   r   é   é*   )r
   rg   éF   )	r   rA   r9   rB   ÚonesrR   r   rU   rV   )rG   Úin_dtyperH   r7   r8   r   r   r   Ú_test_syncthreads_countÓ   s    
z$TestCudaSync._test_syncthreads_countc                 C   s   |   tj¡ d S rL   )rm   rB   r   rM   r   r   r   Útest_syncthreads_countÜ   s    z#TestCudaSync.test_syncthreads_countc                 C   s   |   tj¡ d S rL   )rm   rB   Úint16rM   r   r   r   Útest_syncthreads_count_upcastß   s    z*TestCudaSync.test_syncthreads_count_upcastc                 C   s   |   tj¡ d S rL   )rm   rB   Úint64rM   r   r   r   Útest_syncthreads_count_downcastâ   s    z,TestCudaSync.test_syncthreads_count_downcastc                 C   s„   t  t¡}d}tj||d}tj|tjd}|d|f ||ƒ |  t |dk¡¡ d|d< |d|f ||ƒ |  t |dk¡¡ d S ©Nr%   r@   r
   r   rh   )	r   rA   r:   rB   rk   rR   r   rU   rV   ©rG   rl   rH   rI   r7   r8   r   r   r   Ú_test_syncthreads_andå   s    
z"TestCudaSync._test_syncthreads_andc                 C   s   |   tj¡ d S rL   )ru   rB   r   rM   r   r   r   Útest_syncthreads_andð   s    z!TestCudaSync.test_syncthreads_andc                 C   s   |   tj¡ d S rL   )ru   rB   ro   rM   r   r   r   Útest_syncthreads_and_upcastó   s    z(TestCudaSync.test_syncthreads_and_upcastc                 C   s   |   tj¡ d S rL   )ru   rB   rq   rM   r   r   r   Útest_syncthreads_and_downcastö   s    z*TestCudaSync.test_syncthreads_and_downcastc                 C   s„   t  t¡}d}tj||d}tj|tjd}|d|f ||ƒ |  t |dk¡¡ d|d< |d|f ||ƒ |  t |dk¡¡ d S rs   )r   rA   r;   rB   rR   r   rU   rV   rt   r   r   r   Ú_test_syncthreads_orù   s    
z!TestCudaSync._test_syncthreads_orc                 C   s   |   tj¡ d S rL   )ry   rB   r   rM   r   r   r   Útest_syncthreads_or  s    z TestCudaSync.test_syncthreads_orc                 C   s   |   tj¡ d S rL   )ry   rB   ro   rM   r   r   r   Útest_syncthreads_or_upcast  s    z'TestCudaSync.test_syncthreads_or_upcastc                 C   s   |   tj¡ d S rL   )ry   rB   rq   rM   r   r   r   Útest_syncthreads_or_downcast
  s    z)TestCudaSync.test_syncthreads_or_downcastN)Ú__name__Ú
__module__Ú__qualname__rK   rN   r   rO   r   Z
skipUnlessr=   rQ   rT   rW   rY   r\   rd   re   rf   rm   rn   rp   rr   ru   rv   rw   rx   ry   rz   r{   r|   r   r   r   r   r>   w   s@   

ÿ
ÿ


	r>   Ú__main__)ÚnumpyrB   Znumbar   r   r   Znumba.cuda.testingr   r   r   Znumba.core.configr   r   r   r   r$   r)   r.   r/   r3   r4   r5   r9   r:   r;   r=   r>   r}   Úmainr   r   r   r   Ú<module>   s*    