U
    ,dJ+                     @   sH  d Z ddlmZ ddlZddlZddlZddlmZ ddlZ	ddl
mZ ddlmZ G dd	 d	eZG d
d dZG dd dZG dd deZG dd deZG dd deZe Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze ZG dd deZ G dd deZ!G dd deZ"edd Z#dS )zf
Implements the cuda module as called from within an executing kernel
(@cuda.jit-decorated function).
    )contextmanagerN)types)numpy_support   )vector_typesc                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )Dim3z;
    Used to implement thread/block indices/dimensions
    c                 C   s   || _ || _|| _d S Nxyz)selfr
   r   r    r   B/tmp/pip-unpacked-wheel-eu7e0c37/numba/cuda/simulator/kernelapi.py__init__   s    zDim3.__init__c                 C   s   d| j | j| jf S )Nz(%s, %s, %s)r	   r   r   r   r   __str__   s    zDim3.__str__c                 C   s   d| j | j| jf S )NzDim3(%s, %s, %s)r	   r   r   r   r   __repr__   s    zDim3.__repr__c                 c   s   | j V  | jV  | jV  d S r   r	   r   r   r   r   __iter__!   s    zDim3.__iter__N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r      s
   r   c                   @   s   e Zd ZdZdd ZdS )	GridGroupz+
    Used to implement the grid group.
    c                 C   s   t    d S r   	threadingcurrent_threadsyncthreadsr   r   r   r   sync,   s    zGridGroup.syncN)r   r   r   r   r   r   r   r   r   r   '   s   r   c                   @   s   e Zd ZdZdd ZdS )
FakeCUDACgz!
    CUDA Cooperative Groups
    c                 C   s   t  S r   )r   r   r   r   r   	this_grid7   s    zFakeCUDACg.this_gridN)r   r   r   r   r    r   r   r   r   r   3   s   r   c                   @   s   e Zd ZdZdd ZdS )FakeCUDALocalz
    CUDA Local arrays
    c                 C   s"   t |tjrt|}t||S r   )
isinstancer   Typer   as_dtypenpempty)r   shapedtyper   r   r   array?   s    
zFakeCUDALocal.arrayN)r   r   r   r   r)   r   r   r   r   r!   ;   s   r!   c                   @   s   e Zd ZdZdd ZdS )FakeCUDAConstz
    CUDA Const arrays
    c                 C   s   |S r   r   )r   Zaryr   r   r   
array_likeI   s    zFakeCUDAConst.array_likeN)r   r   r   r   r+   r   r   r   r   r*   E   s   r*   c                   @   s    e Zd ZdZdd Zdd ZdS )FakeCUDAShareda  
    CUDA Shared arrays.

    Limitations: assumes that only one call to cuda.shared.array is on a line,
    and that that line is only executed once per thread. i.e.::

        a = cuda.shared.array(...); b = cuda.shared.array(...)

    will erroneously alias a and b, and::

        for i in range(10):
            sharedarrs[i] = cuda.shared.array(...)

    will alias all arrays created at that point (though it is not certain that
    this would be supported by Numba anyway).
    c                 C   s"   i | _ || _tj|tjd| _d S )N)r(   )_allocations_dynshared_sizer%   zerosbyte
_dynshared)r   dynshared_sizer   r   r   r   _   s    zFakeCUDAShared.__init__c                 C   s   t |tjrt|}|dkr>| j|j }tj| j	j
||dS tt }|d dd }| j|}|d krt||}|| j|< |S )Nr   )r(   count   )r"   r   r#   r   r$   r.   itemsizer%   Z
frombufferr1   data	tracebackextract_stacksys	_getframer-   getr&   )r   r'   r(   r3   stackZcallerresr   r   r   r)   d   s    

zFakeCUDAShared.arrayN)r   r   r   r   r   r)   r   r   r   r   r,   M   s   r,   c                   @   st   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd ZdS )FakeCUDAAtomicc              	   C   s,   t  || }||  |7  < W 5 Q R X |S r   )addlockr   r)   indexvaloldr   r   r   add   s    zFakeCUDAAtomic.addc              	   C   s,   t  || }||  |8  < W 5 Q R X |S r   )sublockrA   r   r   r   sub   s    zFakeCUDAAtomic.subc              	   C   s,   t  || }||  |M  < W 5 Q R X |S r   )andlockrA   r   r   r   and_   s    zFakeCUDAAtomic.and_c              	   C   s,   t  || }||  |O  < W 5 Q R X |S r   )orlockrA   r   r   r   or_   s    zFakeCUDAAtomic.or_c              	   C   s,   t  || }||  |N  < W 5 Q R X |S r   )xorlockrA   r   r   r   xor   s    zFakeCUDAAtomic.xorc              	   C   s>   t 0 || }||kr d||< n||  d7  < W 5 Q R X |S Nr   r   )inclockrA   r   r   r   inc   s    
zFakeCUDAAtomic.incc              	   C   sF   t 8 || }|dks||kr(|||< n||  d8  < W 5 Q R X |S rN   )declockrA   r   r   r   dec   s    
zFakeCUDAAtomic.decc              	   C   s$   t  || }|||< W 5 Q R X |S r   )exchlockrA   r   r   r   exch   s    zFakeCUDAAtomic.exchc              	   C   s*   t  || }t||||< W 5 Q R X |S r   )maxlockmaxrA   r   r   r   rV      s    zFakeCUDAAtomic.maxc              	   C   s*   t  || }t||||< W 5 Q R X |S r   )minlockminrA   r   r   r   rX      s    zFakeCUDAAtomic.minc              	   C   s2   t $ || }t|| |g||< W 5 Q R X |S r   )rU   r%   nanmaxrA   r   r   r   rY      s     zFakeCUDAAtomic.nanmaxc              	   C   s2   t $ || }t|| |g||< W 5 Q R X |S r   )rW   r%   nanminrA   r   r   r   rZ      s     zFakeCUDAAtomic.nanminc              
   C   sB   t 4 d|j }|| }||kr(|||< |W  5 Q R  S Q R X d S )N)r   )caslockndim)r   r)   rD   rC   rB   Zloadedr   r   r   compare_and_swap   s    
zFakeCUDAAtomic.compare_and_swapN)r   r   r   rE   rG   rI   rK   rM   rP   rR   rT   rV   rX   rY   rZ   r]   r   r   r   r   r?      s   		r?   c                   @   s|   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd ZdS )FakeCUDAFp16c                 C   s   || S r   r   r   abr   r   r   hadd   s    zFakeCUDAFp16.haddc                 C   s   || S r   r   r_   r   r   r   hsub   s    zFakeCUDAFp16.hsubc                 C   s   || S r   r   r_   r   r   r   hmul   s    zFakeCUDAFp16.hmulc                 C   s   || | S r   r   r   r`   ra   cr   r   r   hfma   s    zFakeCUDAFp16.hfmac                 C   s   | S r   r   r   r`   r   r   r   hneg   s    zFakeCUDAFp16.hnegc                 C   s   t |S r   )absrh   r   r   r   habs   s    zFakeCUDAFp16.habsc                 C   s   ||kS r   r   r_   r   r   r   heq   s    zFakeCUDAFp16.heqc                 C   s   ||kS r   r   r_   r   r   r   hne   s    zFakeCUDAFp16.hnec                 C   s   ||kS r   r   r_   r   r   r   hge   s    zFakeCUDAFp16.hgec                 C   s   ||kS r   r   r_   r   r   r   hgt   s    zFakeCUDAFp16.hgtc                 C   s   ||kS r   r   r_   r   r   r   hle   s    zFakeCUDAFp16.hlec                 C   s   ||k S r   r   r_   r   r   r   hlt  s    zFakeCUDAFp16.hltc                 C   s
   t ||S r   )rV   r_   r   r   r   hmax  s    zFakeCUDAFp16.hmaxc                 C   s
   t ||S r   )rX   r_   r   r   r   hmin	  s    zFakeCUDAFp16.hminN)r   r   r   rb   rc   rd   rg   ri   rk   rl   rm   rn   ro   rp   rq   rr   rs   r   r   r   r   r^      s   r^   c                   @   s  e Zd ZdZdd Zedd Zedd Zedd	 Zed
d Z	edd Z
edd Zedd Zedd Zedd Zedd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8S )9FakeCUDAModulea7  
    An instance of this class will be injected into the __globals__ for an
    executing function in order to implement calls to cuda.*. This will fail to
    work correctly if the user code does::

        from numba import cuda as something_else

    In other words, the CUDA module must be called cuda.
    c                 C   s   t | | _t | | _t | _t | _t|| _t	 | _
t | _t | _t D ],\}}t| || |jD ]}t| || qhqNd S r   )r   gridDimblockDimr   _cgr!   _localr,   _sharedr*   _constr?   _atomicr^   _fp16r   itemssetattraliases)r   Zgrid_dimZ	block_dimr2   nameZsvtyaliasr   r   r   r     s    



zFakeCUDAModule.__init__c                 C   s   | j S r   )rw   r   r   r   r   cg+  s    zFakeCUDAModule.cgc                 C   s   | j S r   )rx   r   r   r   r   local/  s    zFakeCUDAModule.localc                 C   s   | j S r   )ry   r   r   r   r   shared3  s    zFakeCUDAModule.sharedc                 C   s   | j S r   )rz   r   r   r   r   const7  s    zFakeCUDAModule.constc                 C   s   | j S r   )r{   r   r   r   r   atomic;  s    zFakeCUDAModule.atomicc                 C   s   | j S r   )r|   r   r   r   r   fp16?  s    zFakeCUDAModule.fp16c                 C   s
   t  jS r   )r   r   	threadIdxr   r   r   r   r   C  s    zFakeCUDAModule.threadIdxc                 C   s
   t  jS r   )r   r   blockIdxr   r   r   r   r   G  s    zFakeCUDAModule.blockIdxc                 C   s   dS N    r   r   r   r   r   warpsizeK  s    zFakeCUDAModule.warpsizec                 C   s   t  jd S r   )r   r   Z	thread_idr   r   r   r   laneidO  s    zFakeCUDAModule.laneidc                 C   s   t    d S r   r   r   r   r   r   r   S  s    zFakeCUDAModule.syncthreadsc                 C   s   d S r   r   r   r   r   r   threadfenceV  s    zFakeCUDAModule.threadfencec                 C   s   d S r   r   r   r   r   r   threadfence_blockZ  s    z FakeCUDAModule.threadfence_blockc                 C   s   d S r   r   r   r   r   r   threadfence_system^  s    z!FakeCUDAModule.threadfence_systemc                 C   s   t  |S r   )r   r   syncthreads_countr   rC   r   r   r   r   b  s    z FakeCUDAModule.syncthreads_countc                 C   s   t  |S r   )r   r   syncthreads_andr   r   r   r   r   e  s    zFakeCUDAModule.syncthreads_andc                 C   s   t  |S r   )r   r   syncthreads_orr   r   r   r   r   h  s    zFakeCUDAModule.syncthreads_orc                 C   s   t |dS )N1)binr3   r   r   r   r   popck  s    zFakeCUDAModule.popcc                 C   s   || | S r   r   re   r   r   r   fman  s    zFakeCUDAModule.fmac                 C   s   |d S )NgUUUUUU?r   rh   r   r   r   cbrtq  s    zFakeCUDAModule.cbrtc                 C   s   t d|d d d dS )N{:032b}r5   )intformatr   r   r   r   brevt  s    zFakeCUDAModule.brevc                 C   s    d |}t|t|d S )Nr   0)r   lenlstrip)r   rC   sr   r   r   clzw  s    
zFakeCUDAModule.clzc                 C   s,   d |}t|t|d d d }|S )Nr   r   r   !   )r   r   rstrip)r   rC   r   rr   r   r   ffs{  s    
zFakeCUDAModule.ffsc                 C   s   |r|S |S r   r   re   r   r   r   selp  s    zFakeCUDAModule.selpc                 C   s   | j }| j}| j}|j|j |j }|dkr0|S |j|j |j }|dkrR||fS |j|j |j }|dkrv|||fS td| d S )Nr   r5      z*Global ID has 1-3 dimensions. %d requested)rv   r   r   r
   r   r   RuntimeError)r   nbdimbidtidr
   r   r   r   r   r   grid  s    
zFakeCUDAModule.gridc                 C   sn   | j }| j}|j|j }|dkr$|S |j|j }|dkr@||fS |j|j }|dkr^|||fS td| d S )Nr   r5   r   z,Global grid has 1-3 dimensions. %d requested)rv   ru   r
   r   r   r   )r   r   r   Zgdimr
   r   r   r   r   r   gridsize  s    
zFakeCUDAModule.gridsizeN) r   r   r   r   r   propertyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rt     sL   











rt   c              	   #   sl   ddl m  | j}t fdd| D }tfdd| D }|| z
d V  W 5 || X d S )Nr   cudac                 3   s"   | ]\}}| kr||fV  qd S r   r   .0kvr   r   r   	<genexpr>  s      z&swapped_cuda_module.<locals>.<genexpr>c                 3   s   | ]\}}| fV  qd S r   r   r   )fake_cuda_moduler   r   r     s     )Znumbar   __globals__dictr}   update)fnr   Zfn_globsorigreplr   )r   r   r   swapped_cuda_module  s    

r   )$r   
contextlibr   r:   r   r8   Z
numba.corer   Znumpyr%   Znumba.npr   r   objectr   r   r   r!   r*   r,   Lockr@   rF   rH   rJ   rL   rU   rW   r[   rO   rQ   rS   r?   r^   rt   r   r   r   r   r   <module>   s>   
/X, 