U
    -e҄                     @   s  d dl Z d dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZ dd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Z d#d$ Z!d%d& Z"d'd( Z#d)d* Z$d+d, Z%d-d. Z&d/d0 Z'd1d2 Z(d3d4 Z)d5d6 Z*d7d8 Z+d9d: Z,d;d< Z-d=d> Z.d?d@ Z/dAdB Z0ej1dCdDdEdF Z2ej1dCdDdGdH Z3dIdJ Z4dKdL Z5dMdN Z6dOdP Z7dQdR Z8dSdT Z9dUdV Z:dWdX Z;dYdZ Z<d[d\ Z=d]d^ Z>d_d` Z?dadb Z@dcdd ZAdedf ZBdgdh ZCdidj ZDdkdl ZEdmdn ZFdodp ZGdqdr ZHdsdt ZIdudv ZJdwdx ZKdydz ZLd{d| ZMd}d~ ZNdd ZOdd ZPdd ZQdd ZRdd ZSdd ZTdd ZUG dd deZVeWdkreX  dS )    N)cudaint64)compile_ptx)TypingError)f2)unittestCUDATestCaseskip_on_cudasimskip_unless_cc_53c                 C   s   t jj}|| d< d S Nr   r   	threadIdxxaryi r   h/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/numba/cuda/tests/cudapy/test_intrinsics.pysimple_threadidx   s    r   c                 C   s   t jj}|| |< d S Nr   r   r   r   r   fill_threadidx   s    r   c                 C   s>   t jj}t jj}t jj}|d |d  |d  | |||f< d S N   )r   r   r   yz)r   r   jkr   r   r   fill3d_threadidx   s    r   c                 C   s   t d}|| |< d S r   r   gridr   r   r   r   simple_grid1d   s    
r    c                 C   s"   t d\}}|| | ||f< d S N   r   )r   r   r   r   r   r   simple_grid2d$   s    r#   c                 C   s(   t d}t d}|dkr$|| d< d S )Nr   r   r   r   gridsize)r   r   r   r   r   r   simple_gridsize1d)   s    

r&   c                 C   s@   t d\}}t d\}}|dkr<|dkr<|| d< || d< d S )Nr"   r   r   r$   )r   r   r   r   r   r   r   r   simple_gridsize2d0   s
    r'   c           	      C   sp   t d\}}t jjt jj }t jjt jj }| j\}}t|||D ]&}t|||D ]}|| | ||f< qTqDd S r!   )r   r   gridDimr   blockDimr   shaperange)	cstartXstartYgridXgridYheightwidthr   r   r   r   r   intrinsic_forloop_step8   s    
r3   c                 C   s   t || d< d S r   )r   Zpopcr   r,   r   r   r   simple_popcC   s    r5   c                 C   s   t |||| d< d S r   )r   fmar   abr,   r   r   r   
simple_fmaG   s    r:   c                 C   s   t j|d |d | d< d S r   r   fp16Zhaddr   r8   r9   r   r   r   simple_haddK   s    r>   c                 C   s   t j||| d< d S r   r;   r=   r   r   r   simple_hadd_scalarO   s    r?   c                 C   s$   t j|d |d |d | d< d S r   r   r<   Zhfmar7   r   r   r   simple_hfmaS   s    rA   c                 C   s   t j|||| d< d S r   r@   r7   r   r   r   simple_hfma_scalarW   s    rB   c                 C   s   t j|d |d | d< d S r   r   r<   Zhsubr=   r   r   r   simple_hsub[   s    rD   c                 C   s   t j||| d< d S r   rC   r=   r   r   r   simple_hsub_scalar_   s    rE   c                 C   s   t j|d |d | d< d S r   r   r<   Zhmulr=   r   r   r   simple_hmulc   s    rG   c                 C   s   t j||| d< d S r   rF   r=   r   r   r   simple_hmul_scalarg   s    rH   c                 C   s   t j||| d< d S r   )r   r<   hdivr=   r   r   r   simple_hdiv_scalark   s    rJ   c                 C   s:   t d}|| jk r6|| }|| }t j||| |< d S r   )r   r   sizer<   rI   )r   Zarray_aZarray_br   r8   r9   r   r   r   simple_hdiv_kernelo   s
    

rL   c                 C   s   t j|d | d< d S r   r   r<   Zhnegr   r8   r   r   r   simple_hnegw   s    rO   c                 C   s   t j|| d< d S r   rM   rN   r   r   r   simple_hneg_scalar{   s    rP   c                 C   s   t j|d | d< d S r   r   r<   ZhabsrN   r   r   r   simple_habs   s    rR   c                 C   s   t j|| d< d S r   rQ   rN   r   r   r   simple_habs_scalar   s    rS   c                 C   s   t j||| d< d S r   )r   r<   Zheqr=   r   r   r   simple_heq_scalar   s    rT   c                 C   s   t j||| d< d S r   )r   r<   hner=   r   r   r   simple_hne_scalar   s    rV   c                 C   s   t j||| d< d S r   )r   r<   hger=   r   r   r   simple_hge_scalar   s    rX   c                 C   s   t j||| d< d S r   )r   r<   Zhgtr=   r   r   r   simple_hgt_scalar   s    rY   c                 C   s   t j||| d< d S r   )r   r<   hler=   r   r   r   simple_hle_scalar   s    r[   c                 C   s   t j||| d< d S r   r   r<   hltr=   r   r   r   simple_hlt_scalar   s    r^   T)Zdevicec                 C   s   t j| |S r   r\   r   r   r   r   r   
hlt_func_1   s    r`   c                 C   s   t j| |S r   r\   r_   r   r   r   
hlt_func_2   s    ra   c                 C   s   t ||ot||| d< d S r   )r`   ra   rr8   r9   r,   r   r   r   test_multiple_hcmp_1   s    rd   c                 C   s    t ||otj||| d< d S r   )r`   r   r<   r]   rb   r   r   r   test_multiple_hcmp_2   s    re   c                 C   s    t ||otj||| d< d S r   )r`   r   r<   rW   rb   r   r   r   test_multiple_hcmp_3   s    rf   c                 C   s$   t j||ot j||| d< d S r   r\   rb   r   r   r   test_multiple_hcmp_4   s    rg   c                 C   s$   t j||ot j||| d< d S r   )r   r<   r]   rW   rb   r   r   r   test_multiple_hcmp_5   s    rh   c                 C   s   t j||| d< d S r   )r   r<   Zhmaxr=   r   r   r   simple_hmax_scalar   s    ri   c                 C   s   t j||| d< d S r   )r   r<   Zhminr=   r   r   r   simple_hmin_scalar   s    rj   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r   lenr<   Zhsinrc   r   r   r   r   r   simple_hsin   s    
rm   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r   rk   r<   Zhcosrl   r   r   r   simple_hcos   s    
rn   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r   rk   r<   Zhlogrl   r   r   r   simple_hlog   s    
ro   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r   rk   r<   Zhlog2rl   r   r   r   simple_hlog2   s    
rp   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r   rk   r<   Zhlog10rl   r   r   r   simple_hlog10   s    
rq   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r   rk   r<   Zhexprl   r   r   r   simple_hexp   s    
rr   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r   rk   r<   Zhexp2rl   r   r   r   simple_hexp2   s    
rs   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r   rk   r<   Zhsqrtrl   r   r   r   simple_hsqrt   s    
rt   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r   rk   r<   Zhrsqrtrl   r   r   r   simple_hrsqrt  s    
ru   c                 C   s   | d S )Ng      r   )r   dtyper   r   r   numpy_hrsqrt
  s    rw   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r   rk   r<   Zhceilrl   r   r   r   simple_hceil  s    
rx   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r   rk   r<   Zhfloorrl   r   r   r   simple_hfloor  s    
ry   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r   rk   r<   Zhrcprl   r   r   r   simple_hrcp  s    
rz   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r   rk   r<   Zhtruncrl   r   r   r   simple_htrunc#  s    
r{   c                 C   s.   t d}|t| k r*t j|| | |< d S r   )r   r   rk   r<   Zhrintrl   r   r   r   simple_hrint*  s    
r|   c                 C   s   t || d< d S r   )r   ZcbrtrN   r   r   r   simple_cbrt1  s    r}   c                 C   s   t || d< d S r   )r   Zbrevr4   r   r   r   simple_brev5  s    r~   c                 C   s   t || d< d S r   )r   Zclzr4   r   r   r   
simple_clz9  s    r   c                 C   s   t || d< d S r   )r   Zffsr4   r   r   r   
simple_ffs=  s    r   c                 C   s   t || d< d S r   roundr4   r   r   r   simple_roundA  s    r   c                 C   s   t ||| d< d S r   r   )r   r,   ndigitsr   r   r   simple_round_toE  s    r   c                 C   sF   t d}| | dkr:|d dkr0|| | |< qBd| |< nd| |< d S )Nr      r"   r         r   )r8   r9   r,   r   r   r   r   branching_with_ifsI  s    

r   c                 C   sB   t d}t |d dk|| d}t | | dk|d| |< d S )Nr   r"   r   r   r   r   )r   r   selp)r8   r9   r,   r   innerr   r   r   branching_with_selpsU  s    
r   c                 C   s   t d}t j| |< d S r   )r   r   Zlaneidr   r   r   r   simple_laneid\  s    
r   c                 C   s   t j| d< d S r   )r   Zwarpsize)r   r   r   r   simple_warpsizea  s    r   c                 C   s   t |  d S r   r   r   r   r   r   nonliteral_gride  s    r   c                 C   s   t |  d S r   )r   r%   r   r   r   r   nonliteral_gridsizei  s    r   c                       s  e Zd Z fddZdd Zdd Zdd Zed	d
d Zed	dd Z	dd Z
dd Zdd Zeddd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zed'd( Zed)d* Zed+d,d- Zed.d/ Zed0d1 Zed+d2d3 Zed4d5 Zed6d7 Zed+d8d9 Zed:d; Z ed<d= Z!ed+d>d? Z"ed@dA Z#edBdC Z$edDdE Z%edFdG Z&ed+dHdI Z'edJdK Z(edLdM Z)ed+dNdO Z*edPdQ Z+edRdS Z,edTdU Z-edVdW Z.edXdY Z/edZd[ Z0d\d] Z1d^d_ Z2d`da Z3edbdcdd Z4dedf Z5dgdh Z6didj Z7dkdl Z8edbdmdn Z9dodp Z:dqdr Z;dsdt Z<dudv Z=edbdwdx Z>dydz Z?d{d| Z@d}d~ ZAdd ZBdd ZCeddd ZDdd ZEdd ZFeddd ZGdd ZH  ZIS )TestCudaIntrinsicc                    s   t    tjd d S r   )supersetUpnprandomseedself	__class__r   r   r   n  s    
zTestCudaIntrinsic.setUpc                 C   s@   t dt}tjdtjd}|d | | |d dk d S )Nvoid(int32[:])r   rv   r   r   r   )r   jitr   r   onesint32
assertTruer   compiledr   r   r   r   test_simple_threadidxr  s    z'TestCudaIntrinsic.test_simple_threadidxc                 C   sZ   t dt}d}tj|tjd}tj|tjd}|d|f | | t||k d S )Nr   
   r   r   )	r   r   r   r   r   r   aranger   all)r   r   Nr   expr   r   r   test_fill_threadidxx  s    z%TestCudaIntrinsic.test_fill_threadidxc                    sN   d\  fdd} fdd}| }| }|  t||k d S )N)r         c                     s>   t dt} tj ftjd}| d ff | |S )Nzvoid(int32[:,:,::1])r   r   )r   r   r   r   zerosr   r   r   XYZr   r   c_contigous  s    z<TestCudaIntrinsic.test_fill3d_threadidx.<locals>.c_contigousc                     sD   t dt} ttj ftjd}| d ff | |S )Nzvoid(int32[::1,:,:])r   r   )r   r   r   r   Zasfortranarrayr   r   r   r   r   r   f_contigous  s    z<TestCudaIntrinsic.test_fill3d_threadidx.<locals>.f_contigous)r   r   r   )r   r   r   Zc_resZf_resr   r   r   test_fill3d_threadidx  s    
z'TestCudaIntrinsic.test_fill3d_threadidxzCudasim does not check typesc              	   C   s*   |  td tdt W 5 Q R X d S NZRequireLiteralValuezvoid(int32))assertRaisesRegexr   r   r   r   r   r   r   r   test_nonliteral_grid_error  s    z,TestCudaIntrinsic.test_nonliteral_grid_errorc              	   C   s*   |  td tdt W 5 Q R X d S r   )r   r   r   r   r   r   r   r   r   test_nonliteral_gridsize_error  s    z0TestCudaIntrinsic.test_nonliteral_gridsize_errorc                 C   s\   t dt}d\}}|| }tj|tjd}|||f | | t|t|k d S )Nvoid(int32[::1])r      r   )	r   r   r    r   emptyr   r   r   r   )r   r   ntidnctaidZnelemr   r   r   r   test_simple_grid1d  s    z$TestCudaIntrinsic.test_simple_grid1dc           	      C   s   t dt}d}d}|d |d  |d |d  f}tj|tjd}| }|||f | t|jd D ](}t|jd D ]}|| |||f< q~ql| 	t
||k d S Nzvoid(int32[:,::1])r   r   r   r   r   r   r   )r   r   r#   r   r   r   copyr+   r*   r   r   )	r   r   r   r   r*   r   r   r   r   r   r   r   test_simple_grid2d  s     z$TestCudaIntrinsic.test_simple_grid2dc                 C   sN   t dt}d\}}tjdtjd}|||f | | |d ||  d S )Nr   r   r   r   r   )r   r   r&   r   r   r   assertEqualr   r   r   r   r   r   r   r   test_simple_gridsize1d  s
    z(TestCudaIntrinsic.test_simple_gridsize1dzTests PTX emissionc           
      C   s  t d d  t t d d  f}t|t}t|t}d}d}tjddtj d}| }d|d d< tj|tj d}||df ||| |	|}	| 
d	ttd
|	 tjj||dd tj|tj d}||df ||| |	|}	| 
dttd
|	 tjj||dd d S )N    r      )r*   Z
fill_valuerv   r   r   r   r   r"   z	\s+bra\s+Z	branching)err_msgr   r   )r   r   r   r   r   r   fullr   r   Zinspect_asmr   rk   refindalltestingZassert_array_equal)
r   sigZcu_branching_with_ifsZcu_branching_with_selpsnr9   r,   expectedr8   ptxr   r   r   	test_selp  s$    

zTestCudaIntrinsic.test_selpc                 C   sr   t dt}d}d}tjdtjd}|||f | | |d |d |d   | |d |d |d   d S )Nr   r   r   r"   r   r   r   )r   r   r'   r   r   r   r   r   r   r   r   test_simple_gridsize2d  s    z(TestCudaIntrinsic.test_simple_gridsize2dc              	   C   s   t dt}d}d}|d |d  |d |d  f}tj|tjd}|||f | |\}}|j\}}	tt|d t|d D ]j\}
}||
 ||  }}t||	|D ]B}t|||D ]0}| 	|||f || k|||f || f qqqd S r   )
r   r   r3   r   r   r   r*   zipr+   r   )r   r   r   r   r*   r   r/   r0   r1   r2   r   r   r-   r.   r   r   r   r   r   test_intrinsic_forloop_step  s     
"z-TestCudaIntrinsic.test_intrinsic_forloop_stepc                 C   sF   t jdd }tjdtjdddd}|d | tj|d d S )Nc                 S   s:   t d\}}}t d\}}}|| | | |||f< d S Nr   r$   )outr   r   r   r8   r9   r,   r   r   r   foo  s    z*TestCudaIntrinsic.test_3dgrid.<locals>.fooi  r   	   )r   r   r   r   )r   r   r   r   r   reshaper   Zassert_equal)r   r   arrr   r   r   test_3dgrid  s
    
zTestCudaIntrinsic.test_3dgridc                 C   sZ   t jdd }d\}}}tj|| | tjd|||}|d | | t| d S )Nc           	      S   s   t d\}}}t d\}}}|t jjt jjt jj  kor|t jjt jjt jj  kor|t jjt jjt jj  k}|t jjt j	j ko|t jjt j	j ko|t jjt j	j k}|o|| |||f< d S r   )
r   r   r%   r   r   ZblockIdxr)   r   r   r(   )	r   r   r   r   r8   r9   r,   Zgrid_is_rightZgridsize_is_rightr   r   r   r     s    z,TestCudaIntrinsic.test_3dgrid_2.<locals>.foo)   r      r   ))r   r   r"   )r   r"   r   )r   r   r   r   Zbool_r   r   r   )r   r   r   r   r   r   r   r   r   test_3dgrid_2  s    

"zTestCudaIntrinsic.test_3dgrid_2c                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nvoid(int32[:], uint32)r   r   r      r   r   r   r   r5   r   r   r   assertEqualsr   r   r   r   test_popc_u4  s    zTestCudaIntrinsic.test_popc_u4c                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nzvoid(int32[:], uint64)r   r   r   l        @ r   r   r   r   r   r   r   test_popc_u8  s    zTestCudaIntrinsic.test_popc_u8c                 C   sF   t dt}tjdtjd}|d |ddd tj|d d	 d S )
Nzvoid(f4[:], f4, f4, f4)r   r   r          @      @      @r   r   )r   r   r:   r   r   float32r   assert_allcloser   r   r   r   test_fma_f4  s    zTestCudaIntrinsic.test_fma_f4c                 C   sF   t dt}tjdtjd}|d |ddd tj|d d	 d S )
Nzvoid(f8[:], f8, f8, f8)r   r   r   r   r   r   r   r   )r   r   r:   r   r   float64r   r   r   r   r   r   test_fma_f8"  s    zTestCudaIntrinsic.test_fma_f8c                 C   sl   t dt}tjdtjd}tjdgtjd}tjdgtjd}|d ||| tj|d ||  d S Nvoid(f2[:], f2[:], f2[:])r   r   r   r   r   r   )	r   r   r>   r   r   float16arrayr   r   r   r   r   arg1arg2r   r   r   	test_hadd(  s    zTestCudaIntrinsic.test_haddc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S )Nvoid(f2[:], f2, f2)r   r   JM!	@r   r   r   )r   r   r?   r   r   r   r   r   r   r   r   r   r   refr   r   r   test_hadd_scalar1  s    

z"TestCudaIntrinsic.test_hadd_scalarz(Compilation unsupported in the simulatorc                 C   s4   t d d  t t f}tt|dd\}}| d| d S )Nr   r   cczadd.f16)r   r   r?   assertInr   argsr   _r   r   r   test_hadd_ptx;  s    zTestCudaIntrinsic.test_hadd_ptxc                 C   s   t dt}tjdtjd}tjdgtjd}tjdgtjd}tjdgtjd}|d |||| tj|d || |  d S )	Nz void(f2[:], f2[:], f2[:], f2[:])r   r   r   r   r   r   r   )	r   r   rA   r   r   r   r   r   r   )r   r   r   r   r   arg3r   r   r   	test_hfmaA  s    zTestCudaIntrinsic.test_hfmac                 C   sp   t dt}tjdtjd}td}td}td}|d |||| || | }tj|d | d S )	Nzvoid(f2[:], f2, f2, f2)r   r   r   r   r   r   r   )r   r   rB   r   r   r   r   r   )r   r   r   r   r   r  r   r   r   r   test_hfma_scalarK  s    


z"TestCudaIntrinsic.test_hfma_scalarc                 C   s6   t d d  t t t f}tt|dd\}}| d| d S )Nr   r   z
fma.rn.f16)r   r   rB   r  r  r   r   r   test_hfma_ptxV  s    zTestCudaIntrinsic.test_hfma_ptxc                 C   sl   t dt}tjdtjd}tjdgtjd}tjdgtjd}|d ||| tj|d ||  d S r   )	r   r   rD   r   r   r   r   r   r   r   r   r   r   	test_hsub\  s    zTestCudaIntrinsic.test_hsubc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S Nr   r   r   r   gQ?r   r   )r   r   rE   r   r   r   r   r   r   r   r   r   test_hsub_scalare  s    

z"TestCudaIntrinsic.test_hsub_scalarc                 C   s4   t d d  t t f}tt|dd\}}| d| d S )Nr   r   zsub.f16)r   r   rE   r  r  r   r   r   test_hsub_ptxo  s    zTestCudaIntrinsic.test_hsub_ptxc                 C   sj   t  t}tjdtjd}tjdgtjd}tjdgtjd}|d ||| tj|d ||  d S )Nr   r   r   r   r   r   )	r   r   rG   r   r   r   r   r   r   r   r   r   r   	test_hmulu  s    zTestCudaIntrinsic.test_hmulc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S r  )r   r   rH   r   r   r   r   r   r   r   r   r   test_hmul_scalar~  s    

z"TestCudaIntrinsic.test_hmul_scalarc                 C   s4   t d d  t t f}tt|dd\}}| d| d S )Nr   r   zmul.f16)r   r   rH   r  r  r   r   r   test_hmul_ptx  s    zTestCudaIntrinsic.test_hmul_ptxc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S r  )r   r   rJ   r   r   r   r   r   r   r   r   r   test_hdiv_scalar  s    

z"TestCudaIntrinsic.test_hdiv_scalarc                 C   s   t dt}tjjddddtj}tjjddddtj}tj|tjd}|	|j
||| || }tj|| d S )Nr   i    i  rK   r   )r   r   rL   r   r   randintastyper   
zeros_likeforallrK   r   r   )r   r   Zarry1Zarry2r   r   r   r   r   	test_hdiv  s    zTestCudaIntrinsic.test_hdivc                 C   sV   t dt}tjdtjd}tjdgtjd}|d || tj|d |  d S )Nvoid(f2[:], f2[:])r   r   r   r   r   )	r   r   rO   r   r   r   r   r   r   r   r   r   r   r   r   r   	test_hneg  s
    zTestCudaIntrinsic.test_hnegc                 C   sR   t dt}tjdtjd}td}|d || | }tj|d | d S )Nvoid(f2[:], f2)r   r   r   r   r   )r   r   rP   r   r   r   r   r   r   r   r   r   r   r   r   r   test_hneg_scalar  s    
z"TestCudaIntrinsic.test_hneg_scalarc                 C   s2   t d d  t f}tt|dd\}}| d| d S )Nr   r   zneg.f16)r   r   rP   r  r  r   r   r   test_hneg_ptx  s    zTestCudaIntrinsic.test_hneg_ptxc                 C   sV   t  t}tjdtjd}tjdgtjd}|d || tj|d t	| d S )Nr   r         r   r   )
r   r   rR   r   r   r   r   r   r   absr  r   r   r   	test_habs  s
    zTestCudaIntrinsic.test_habsc                 C   sT   t dt}tjdtjd}td}|d || t|}tj|d | d S )Nr  r   r   gJM!	r   r   )	r   r   rS   r   r   r   r"  r   r   r  r   r   r   test_habs_scalar  s    
z"TestCudaIntrinsic.test_habs_scalarc                 C   s2   t d d  t f}tt|dd\}}| d| d S )Nr   r   zabs.f16)r   r   rS   r  r  r   r   r   test_habs_ptx  s    zTestCudaIntrinsic.test_habs_ptxc                 C   s  t ttttttttt	t
tf}ttf}tjtjtjtjtjtjtjtjtjtjtjtf}tjtjf}d}tjd tjjdd|d tj!}t"|}t#||D ]\\}}	| j$|	dB t%&d|}|d|f || |	|tj!d}
tj'(||
 W 5 Q R X qtjjdd|d tj!}t#||D ]^\}}	| j$|	dB t%&d|}|d|f || |	|tj!d}
tj'(||
 W 5 Q R X q"d S )	Nr   r   r  r  fnr  r   r   ))rm   rn   ro   rp   rq   rt   rx   ry   rz   r{   r|   ru   rr   rs   r   sincosloglog2log10sqrtceilfloorZ
reciprocaltruncZrintrw   r   Zexp2r   r   r  r  r   r  r   subTestr   r   r   r   )r   ZkernelsZexp_kernelsZexpected_functionsZexpected_exp_functionsr   r   rc   kernelr'  r   Zx2r   r   r   test_fp16_intrinsics_common  sV                
z-TestCudaIntrinsic.test_fp16_intrinsics_commonc                 C   sf   t  dd }d}tjd tj|tj}t|}|d|f || tj	
|d|  d S )Nc                 S   s.   t d}|t| k r*t j|| | |< d S r   )r   r   rk   r<   Zhexp10rl   r   r   r   hexp10_vectors  s    
z5TestCudaIntrinsic.test_hexp10.<locals>.hexp10_vectorsr   r   r   )r   r   r   r   r   Zrandr  r   r  r   r   )r   r4  r   r   rc   r   r   r   test_hexp10  s    

zTestCudaIntrinsic.test_hexp10c              
   C   s&  t tttttf}tjtjtj	tj
tjtjf}t||D ]\}}| j|d td|}tjdtjd}tjdtjd}td}td}	td}
|d ||	|	 ||	|	}| ||d	  |d ||	|
 ||	|
}| ||d	  |d ||	| ||	|}| ||d	  W 5 Q R X q6d S )
N)opzvoid(b1[:], f2, f2)r   r   r"   r   r   r   r   )rT   rV   rX   rY   r[   r^   operatoreqnegegtleltr   r1  r   r   r   r   bool8r   r   )r   fnsZopsr'  r6  r2  r   gotr   r  Zarg4r   r   r   test_fp16_comparison  s6        





z&TestCudaIntrinsic.test_fp16_comparisonc              
   C   s   t ttttf}|D ]x}| j|db td|}tj	dtj
d}td}td}td}|d |||| | |d	  W 5 Q R X qd S )
Nr&  zvoid(b1[:], f2, f2, f2)r   r   r   r   r   r   r   )rd   re   rf   rg   rh   r1  r   r   r   r   r>  r   r   )r   Z	functionsr'  r   r   r   r   r  r   r   r   !test_multiple_float16_comparisons(  s    


z3TestCudaIntrinsic.test_multiple_float16_comparisonsc                 C   s   t dt}tjdtjd}td}td}|d ||| tj|d | td}|d ||| tj|d | d S 	Nr   r   r   r   r   r   r   g      @)r   r   ri   r   r   r   r   r   r   r   r   r   	test_hmax9  s    


zTestCudaIntrinsic.test_hmaxc                 C   s   t dt}tjdtjd}td}td}|d ||| tj|d | td}|d ||| tj|d | d S rC  )r   r   rj   r   r   r   r   r   r   r   r   r   	test_hminE  s    


zTestCudaIntrinsic.test_hminc                 C   sJ   t dt}tjdtjd}d}|d || tj|d |d  d S )Nzvoid(float32[:], float32)r   r   r   r   r   UUUUUU?)r   r   r}   r   r   r   r   r   r   r   r   Zcbrt_argr   r   r   test_cbrt_f32Q  s
    zTestCudaIntrinsic.test_cbrt_f32c                 C   sJ   t dt}tjdtjd}d}|d || tj|d |d  d S )Nzvoid(float64[:], float64)r   r   g      @r   r   rF  )r   r   r}   r   r   r   r   r   rG  r   r   r   test_cbrt_f64X  s
    zTestCudaIntrinsic.test_cbrt_f64c                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nzvoid(uint32[:], uint32)r   r   r   i0  r   i  )r   r   r~   r   r   Zuint32r   r   r   r   r   test_brev_u4_  s    zTestCudaIntrinsic.test_brev_u4z.only get given a Python "int", assumes 32 bitsc                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nzvoid(uint64[:], uint64)r   r   r   l   0  C r   l       `x)r   r   r~   r   r   Zuint64r   r   r   r   r   test_brev_u8e  s    zTestCudaIntrinsic.test_brev_u8c                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nvoid(int32[:], int32)r   r   r      r      r   r   r   r   r   r   r   r   r   r   r   test_clz_i4l  s    zTestCudaIntrinsic.test_clz_i4c                 C   s@   t dt}tjdtjd}|d |d | |d d dS )	a  
        Although the CUDA Math API
        (http://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html)
        only says int32 & int64 arguments are supported in C code, the LLVM
        IR input supports i8, i16, i32 & i64 (LLVM doesn't have a concept of
        unsigned integers, just unsigned operations on integers).
        http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics
        r   r   r   r   rM  r   rN  NrO  r   r   r   r   test_clz_u4r  s    	zTestCudaIntrinsic.test_clz_u4c                 C   s@   t dt}tjdtjd}|d |d | |d d d S NrL  r   r   r   l    r   rO  r   r   r   r   test_clz_i4_1s  s    z TestCudaIntrinsic.test_clz_i4_1sc                 C   sB   t dt}tjdtjd}|d |d | |d dd d S )NrL  r   r   r   r   r   CUDA semanticsrO  r   r   r   r   test_clz_i4_0s  s    z TestCudaIntrinsic.test_clz_i4_0sc                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nvoid(int32[:], int64)r   r   r      r   /   rO  r   r   r   r   test_clz_i8  s    zTestCudaIntrinsic.test_clz_i8c                 C   s^   t dt}tjdtjd}|d |d | |d d |d |d | |d d	 d S )
NrL  r   r   r   rM  r              r   r   r   r   r   r   r   r   r   r   r   r   test_ffs_i4  s    zTestCudaIntrinsic.test_ffs_i4c                 C   s^   t dt}tjdtjd}|d |d | |d d |d |d | |d d	 d S )
Nr   r   r   r   rM  r   rZ  r[  r   r\  r   r   r   r   test_ffs_u4  s    zTestCudaIntrinsic.test_ffs_u4c                 C   s@   t dt}tjdtjd}|d |d | |d d d S rR  r\  r   r   r   r   test_ffs_i4_1s  s    z TestCudaIntrinsic.test_ffs_i4_1sc                 C   s@   t dt}tjdtjd}|d |d | |d d d S )NrL  r   r   r   r   r\  r   r   r   r   test_ffs_i4_0s  s    z TestCudaIntrinsic.test_ffs_i4_0sc                 C   s^   t dt}tjdtjd}|d |d | |d d |d |d | |d d	 d S )
NrV  r   r   r   rW  r   r   l        !   r\  r   r   r   r   test_ffs_i8  s    zTestCudaIntrinsic.test_ffs_i8c                 C   sj   t dt}d}tj|d tjd}ttjdtjd|}|d|d f | | t	||k d S )Nr   r"   r   r   r   )
r   r   r   r   r   r   Ztiler   r   r   )r   r   countr   r   r   r   r   test_simple_laneid  s    z$TestCudaIntrinsic.test_simple_laneidc                 C   s@   t dt}tjdtjd}|d | | |d dd d S )Nr   r   r   r   r   r   rT  )r   r   r   r   r   r   r   r   r   r   r   test_simple_warpsize  s    z&TestCudaIntrinsic.test_simple_warpsizec                 C   sN   t dt}tjdtjd}dD ]&}|d || | |d t| q"d S )Nzvoid(int64[:], float32)r   r   r!  g      g      g      g      ?g      @g      @g      @r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   test_round_f4  s
    zTestCudaIntrinsic.test_round_f4c                 C   sN   t dt}tjdtjd}dD ]&}|d || | |d t| q"d S )Nzvoid(int64[:], float64)r   r   rf  r   r   rg  rh  r   r   r   test_round_f8  s
    zTestCudaIntrinsic.test_round_f8c              
   C   s   t dt}tjdtjd}tjd tjdtj}t	|t
tjtj tjgf d}t||D ]L\}}| j||d0 |d ||| | j|d	 t||d
d W 5 Q R X qpd S )N void(float32[:], float32, int32)r   r   {   r   )r   r   r"   r   r   r   r   valr   r   r   singleprec)r   r   r   r   r   r   r   r   r  concatenater   infnan	itertoolsproductr1  assertPreciseEqualr   r   r   r   valsdigitsrs  r   r   r   r   test_round_to_f4  s    "	z"TestCudaIntrinsic.test_round_to_f4z$Overflow behavior differs on CPythonc                 C   sT   t dt}tjdtjd}ttjj}d}|d ||| | |d | d S )Nrk  r   r   i,  r   r   )	r   r   r   r   r   r   finfomaxr   r   r   r   rs  r   r   r   r   test_round_to_f4_overflow  s    z+TestCudaIntrinsic.test_round_to_f4_overflowc                 C   sT   t dt}tjdtjd}d}d}|d ||| | j|d t||dd	 d S )
Nrk  r   r   gQ?r   r   r   rt  ru  )r   r   r   r   r   r   r|  r   r  r   r   r   test_round_to_f4_halfway  s    z*TestCudaIntrinsic.test_round_to_f4_halfwayc              
   C   s  t dt}tjdtjd}tjd tjd}t|t	tj
tj
 tjgf d}t||D ]L\}}| j||d0 |d ||| | j|d	 t||d
d W 5 Q R X qhd}d}| j||d0 |d ||| | j|d	 t||dd W 5 Q R X d S )N void(float64[:], float64, int32)r   r   rl  r   )rm  rn  ro  rp  rq  r   r   r"   r   r   r   rr  r   r   exactru  g`8p=<   double)r   r   r   r   r   r   r   r   rw  r   rx  ry  rz  r{  r1  r|  r   r}  r   r   r   test_round_to_f8  s&    "z"TestCudaIntrinsic.test_round_to_f8c                 C   sT   t dt}tjdtjd}ttjj}d}|d ||| | |d | d S )Nr  r   r   r   r   r   )	r   r   r   r   r   r   r  r  r   r  r   r   r   test_round_to_f8_overflow  s    z+TestCudaIntrinsic.test_round_to_f8_overflowc                 C   sT   t dt}tjdtjd}d}d}|d ||| | j|d t||dd	 d S )
Nr  r   r   g\(\?r   r   r   r  ru  )r   r   r   r   r   r   r|  r   r  r   r   r   test_round_to_f8_halfway,  s    z*TestCudaIntrinsic.test_round_to_f8_halfway)J__name__
__module____qualname__r   r   r   r   r	   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r
   r   r   r  r  r	  r
  r  r  r  r  r  r  r  r  r  r  r   r#  r$  r%  r3  r5  rA  rB  rD  rE  rH  rI  rJ  rK  rP  rQ  rS  rU  rY  r]  r^  r_  r`  rb  rd  re  ri  rj  r  r  r  r  r  r  __classcell__r   r   r   r   r   m  s   





	

	




	


	











"










r   __main__)Yrz  numpyr   r7  r   Znumbar   r   Z
numba.cudar   Znumba.core.errorsr   Znumba.core.typesr   Znumba.cuda.testingr   r   r	   r
   r   r   r   r    r#   r&   r'   r3   r5   r:   r>   r?   rA   rB   rD   rE   rG   rH   rJ   rL   rO   rP   rR   rS   rT   rV   rX   rY   r[   r^   r   r`   ra   rd   re   rf   rg   rh   ri   rj   rm   rn   ro   rp   rq   rr   rs   rt   ru   rw   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r  mainr   r   r   r   <module>   s   



     P
