U
    -em0                     @   sP  d Z ddlmZ ddlZddlZddlZddlmZ ddlZ	ddl
mZ ddlmZ G dd	 d	eZG d
d dZG dd dZG dd deZG dd deZG dd deZe Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Z G dd deZ!G dd deZ"G dd deZ#edd Z$dS )zf
Implements the cuda module as called from within an executing kernel
(@cuda.jit-decorated function).
    )contextmanagerN)types)numpy_support   )vector_typesc                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )Dim3z;
    Used to implement thread/block indices/dimensions
    c                 C   s   || _ || _|| _d S Nxyz)selfr
   r   r    r   _/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/numba/cuda/simulator/kernelapi.py__init__   s    zDim3.__init__c                 C   s   d| j | j| jf S )Nz(%s, %s, %s)r	   r   r   r   r   __str__   s    zDim3.__str__c                 C   s   d| j | j| jf S )NzDim3(%s, %s, %s)r	   r   r   r   r   __repr__   s    zDim3.__repr__c                 c   s   | j V  | jV  | jV  d S r   r	   r   r   r   r   __iter__!   s    zDim3.__iter__N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r      s
   r   c                   @   s   e Zd ZdZdd ZdS )	GridGroupz+
    Used to implement the grid group.
    c                 C   s   t    d S r   	threadingcurrent_threadsyncthreadsr   r   r   r   sync,   s    zGridGroup.syncN)r   r   r   r   r   r   r   r   r   r   '   s   r   c                   @   s   e Zd ZdZdd ZdS )
FakeCUDACgz!
    CUDA Cooperative Groups
    c                 C   s   t  S r   )r   r   r   r   r   	this_grid7   s    zFakeCUDACg.this_gridN)r   r   r   r   r    r   r   r   r   r   3   s   r   c                   @   s   e Zd ZdZdd ZdS )FakeCUDALocalz
    CUDA Local arrays
    c                 C   s"   t |tjrt|}t||S r   )
isinstancer   Typer   as_dtypenpempty)r   shapedtyper   r   r   array?   s    
zFakeCUDALocal.arrayN)r   r   r   r   r)   r   r   r   r   r!   ;   s   r!   c                   @   s   e Zd ZdZdd ZdS )FakeCUDAConstz
    CUDA Const arrays
    c                 C   s   |S r   r   )r   Zaryr   r   r   
array_likeI   s    zFakeCUDAConst.array_likeN)r   r   r   r   r+   r   r   r   r   r*   E   s   r*   c                   @   s    e Zd ZdZdd Zdd ZdS )FakeCUDAShareda  
    CUDA Shared arrays.

    Limitations: assumes that only one call to cuda.shared.array is on a line,
    and that that line is only executed once per thread. i.e.::

        a = cuda.shared.array(...); b = cuda.shared.array(...)

    will erroneously alias a and b, and::

        for i in range(10):
            sharedarrs[i] = cuda.shared.array(...)

    will alias all arrays created at that point (though it is not certain that
    this would be supported by Numba anyway).
    c                 C   s"   i | _ || _tj|tjd| _d S N)r(   )_allocations_dynshared_sizer%   Zzerosbyte
_dynshared)r   dynshared_sizer   r   r   r   _   s    zFakeCUDAShared.__init__c                 C   s   t |tjrt|}|dkr>| j|j }tj| j	j
||dS tt }|d dd }| j|}|d krt||}|| j|< |S )Nr   )r(   count   )r"   r   r#   r   r$   r/   itemsizer%   Z
frombufferr1   data	tracebackextract_stacksys	_getframer.   getr&   )r   r'   r(   r3   stackZcallerresr   r   r   r)   d   s    

zFakeCUDAShared.arrayN)r   r   r   r   r   r)   r   r   r   r   r,   M   s   r,   c                   @   s|   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd ZdS )FakeCUDAAtomicc              	   C   s,   t  || }||  |7  < W 5 Q R X |S r   )addlockr   r)   indexvaloldr   r   r   add   s    zFakeCUDAAtomic.addc              	   C   s,   t  || }||  |8  < W 5 Q R X |S r   )sublockrA   r   r   r   sub   s    zFakeCUDAAtomic.subc              	   C   s,   t  || }||  |M  < W 5 Q R X |S r   )andlockrA   r   r   r   and_   s    zFakeCUDAAtomic.and_c              	   C   s,   t  || }||  |O  < W 5 Q R X |S r   )orlockrA   r   r   r   or_   s    zFakeCUDAAtomic.or_c              	   C   s,   t  || }||  |N  < W 5 Q R X |S r   )xorlockrA   r   r   r   xor   s    zFakeCUDAAtomic.xorc              	   C   s>   t 0 || }||kr d||< n||  d7  < W 5 Q R X |S Nr   r   )inclockrA   r   r   r   inc   s    
zFakeCUDAAtomic.incc              	   C   sF   t 8 || }|dks||kr(|||< n||  d8  < W 5 Q R X |S rN   )declockrA   r   r   r   dec   s    
zFakeCUDAAtomic.decc              	   C   s$   t  || }|||< W 5 Q R X |S r   )exchlockrA   r   r   r   exch   s    zFakeCUDAAtomic.exchc              	   C   s*   t  || }t||||< W 5 Q R X |S r   )maxlockmaxrA   r   r   r   rV      s    zFakeCUDAAtomic.maxc              	   C   s*   t  || }t||||< W 5 Q R X |S r   )minlockminrA   r   r   r   rX      s    zFakeCUDAAtomic.minc              	   C   s2   t $ || }t|| |g||< W 5 Q R X |S r   )rU   r%   nanmaxrA   r   r   r   rY      s     zFakeCUDAAtomic.nanmaxc              	   C   s2   t $ || }t|| |g||< W 5 Q R X |S r   )rW   r%   nanminrA   r   r   r   rZ      s     zFakeCUDAAtomic.nanminc              
   C   sB   t 4 d|j }|| }||kr(|||< |W  5 Q R  S Q R X d S )N)r   )compare_and_swaplockndim)r   r)   rD   rC   rB   loadedr   r   r   compare_and_swap   s    
zFakeCUDAAtomic.compare_and_swapc              
   C   s8   t * || }||kr|||< |W  5 Q R  S Q R X d S r   )caslock)r   r)   rB   rD   rC   r]   r   r   r   cas   s
    zFakeCUDAAtomic.casN)r   r   r   rE   rG   rI   rK   rM   rP   rR   rT   rV   rX   rY   rZ   r^   r`   r   r   r   r   r?      s   		r?   c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Zd7d8 Zd9d: Zd;d< Z d=S )>FakeCUDAFp16c                 C   s   || S r   r   r   abr   r   r   hadd   s    zFakeCUDAFp16.haddc                 C   s   || S r   r   rb   r   r   r   hsub   s    zFakeCUDAFp16.hsubc                 C   s   || S r   r   rb   r   r   r   hmul   s    zFakeCUDAFp16.hmulc                 C   s   || S r   r   rb   r   r   r   hdiv   s    zFakeCUDAFp16.hdivc                 C   s   || | S r   r   r   rc   rd   cr   r   r   hfma   s    zFakeCUDAFp16.hfmac                 C   s   | S r   r   r   rc   r   r   r   hneg   s    zFakeCUDAFp16.hnegc                 C   s   t |S r   )absrl   r   r   r   habs   s    zFakeCUDAFp16.habsc                 C   s   t j|t jdS r-   )r%   sinfloat16r   r
   r   r   r   hsin   s    zFakeCUDAFp16.hsinc                 C   s   t j|t jdS r-   )r%   cosrq   rr   r   r   r   hcos  s    zFakeCUDAFp16.hcosc                 C   s   t j|t jdS r-   )r%   logrq   rr   r   r   r   hlog  s    zFakeCUDAFp16.hlogc                 C   s   t j|t jdS r-   )r%   log2rq   rr   r   r   r   hlog2  s    zFakeCUDAFp16.hlog2c                 C   s   t j|t jdS r-   )r%   log10rq   rr   r   r   r   hlog10  s    zFakeCUDAFp16.hlog10c                 C   s   t j|t jdS r-   )r%   exprq   rr   r   r   r   hexp  s    zFakeCUDAFp16.hexpc                 C   s   t j|t jdS r-   )r%   Zexp2rq   rr   r   r   r   hexp2  s    zFakeCUDAFp16.hexp2c                 C   s   t d| S )N
   r%   rq   rr   r   r   r   hexp10  s    zFakeCUDAFp16.hexp10c                 C   s   t j|t jdS r-   )r%   sqrtrq   rr   r   r   r   hsqrt  s    zFakeCUDAFp16.hsqrtc                 C   s   t |d S )Ng      r   rr   r   r   r   hrsqrt  s    zFakeCUDAFp16.hrsqrtc                 C   s   t j|t jdS r-   r%   ceilrq   rr   r   r   r   hceil  s    zFakeCUDAFp16.hceilc                 C   s   t j|t jdS r-   r   rr   r   r   r   hfloor   s    zFakeCUDAFp16.hfloorc                 C   s   t j|t jdS r-   )r%   Z
reciprocalrq   rr   r   r   r   hrcp#  s    zFakeCUDAFp16.hrcpc                 C   s   t j|t jdS r-   )r%   truncrq   rr   r   r   r   htrunc&  s    zFakeCUDAFp16.htruncc                 C   s   t j|t jdS r-   )r%   Zrintrq   rr   r   r   r   hrint)  s    zFakeCUDAFp16.hrintc                 C   s   ||kS r   r   rb   r   r   r   heq,  s    zFakeCUDAFp16.heqc                 C   s   ||kS r   r   rb   r   r   r   hne/  s    zFakeCUDAFp16.hnec                 C   s   ||kS r   r   rb   r   r   r   hge2  s    zFakeCUDAFp16.hgec                 C   s   ||kS r   r   rb   r   r   r   hgt5  s    zFakeCUDAFp16.hgtc                 C   s   ||kS r   r   rb   r   r   r   hle8  s    zFakeCUDAFp16.hlec                 C   s   ||k S r   r   rb   r   r   r   hlt;  s    zFakeCUDAFp16.hltc                 C   s
   t ||S r   )rV   rb   r   r   r   hmax>  s    zFakeCUDAFp16.hmaxc                 C   s
   t ||S r   )rX   rb   r   r   r   hminA  s    zFakeCUDAFp16.hminN)!r   r   r   re   rf   rg   rh   rk   rm   ro   rs   ru   rw   ry   r{   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ra      s<   ra   c                   @   s  e Zd ZdZdd Zedd Zedd Zedd	 Zed
d Z	edd Z
edd Zedd Zedd Zedd Zedd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8S )9FakeCUDAModulea7  
    An instance of this class will be injected into the __globals__ for an
    executing function in order to implement calls to cuda.*. This will fail to
    work correctly if the user code does::

        from numba import cuda as something_else

    In other words, the CUDA module must be called cuda.
    c                 C   s   t | | _t | | _t | _t | _t|| _t	 | _
t | _t | _t D ],\}}t| || |jD ]}t| || qhqNd S r   )r   gridDimblockDimr   _cgr!   _localr,   _sharedr*   _constr?   _atomicra   _fp16r   itemssetattraliases)r   Zgrid_dimZ	block_dimr2   nameZsvtyaliasr   r   r   r   P  s    



zFakeCUDAModule.__init__c                 C   s   | j S r   )r   r   r   r   r   cgc  s    zFakeCUDAModule.cgc                 C   s   | j S r   )r   r   r   r   r   localg  s    zFakeCUDAModule.localc                 C   s   | j S r   )r   r   r   r   r   sharedk  s    zFakeCUDAModule.sharedc                 C   s   | j S r   )r   r   r   r   r   consto  s    zFakeCUDAModule.constc                 C   s   | j S r   )r   r   r   r   r   atomics  s    zFakeCUDAModule.atomicc                 C   s   | j S r   )r   r   r   r   r   fp16w  s    zFakeCUDAModule.fp16c                 C   s
   t  jS r   )r   r   	threadIdxr   r   r   r   r   {  s    zFakeCUDAModule.threadIdxc                 C   s
   t  jS r   )r   r   blockIdxr   r   r   r   r     s    zFakeCUDAModule.blockIdxc                 C   s   dS N    r   r   r   r   r   warpsize  s    zFakeCUDAModule.warpsizec                 C   s   t  jd S r   )r   r   	thread_idr   r   r   r   laneid  s    zFakeCUDAModule.laneidc                 C   s   t    d S r   r   r   r   r   r   r     s    zFakeCUDAModule.syncthreadsc                 C   s   d S r   r   r   r   r   r   threadfence  s    zFakeCUDAModule.threadfencec                 C   s   d S r   r   r   r   r   r   threadfence_block  s    z FakeCUDAModule.threadfence_blockc                 C   s   d S r   r   r   r   r   r   threadfence_system  s    z!FakeCUDAModule.threadfence_systemc                 C   s   t  |S r   )r   r   syncthreads_countr   rC   r   r   r   r     s    z FakeCUDAModule.syncthreads_countc                 C   s   t  |S r   )r   r   syncthreads_andr   r   r   r   r     s    zFakeCUDAModule.syncthreads_andc                 C   s   t  |S r   )r   r   syncthreads_orr   r   r   r   r     s    zFakeCUDAModule.syncthreads_orc                 C   s   t |dS )N1)binr3   r   r   r   r   popc  s    zFakeCUDAModule.popcc                 C   s   || | S r   r   ri   r   r   r   fma  s    zFakeCUDAModule.fmac                 C   s   |d S )NgUUUUUU?r   rl   r   r   r   cbrt  s    zFakeCUDAModule.cbrtc                 C   s   t d|d d d dS )N{:032b}r5   )intformatr   r   r   r   brev  s    zFakeCUDAModule.brevc                 C   s    d |}t|t|d S )Nr   0)r   lenlstrip)r   rC   sr   r   r   clz  s    
zFakeCUDAModule.clzc                 C   s,   d |}t|t|d d d }|S )Nr   r   r   !   )r   r   rstrip)r   rC   r   rr   r   r   ffs  s    
zFakeCUDAModule.ffsc                 C   s   |r|S |S r   r   ri   r   r   r   selp  s    zFakeCUDAModule.selpc                 C   s   | j }| j}| j}|j|j |j }|dkr0|S |j|j |j }|dkrR||fS |j|j |j }|dkrv|||fS td| d S )Nr   r5      z*Global ID has 1-3 dimensions. %d requested)r   r   r   r
   r   r   RuntimeError)r   nbdimbidtidr
   r   r   r   r   r   grid  s    
zFakeCUDAModule.gridc                 C   sn   | j }| j}|j|j }|dkr$|S |j|j }|dkr@||fS |j|j }|dkr^|||fS td| d S )Nr   r5   r   z,Global grid has 1-3 dimensions. %d requested)r   r   r
   r   r   r   )r   r   r   Zgdimr
   r   r   r   r   r   gridsize  s    
zFakeCUDAModule.gridsizeN) r   r   r   r   r   propertyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   E  sL   











r   c              	   #   sl   ddl m  | j}t fdd| D }tfdd| D }|| z
d V  W 5 || X d S )Nr   cudac                 3   s"   | ]\}}| kr||fV  qd S r   r   .0kvr   r   r   	<genexpr>  s      z&swapped_cuda_module.<locals>.<genexpr>c                 3   s   | ]\}}| fV  qd S r   r   r   )fake_cuda_moduler   r   r     s     )Znumbar   __globals__dictr   update)fnr   Zfn_globsorigreplr   )r   r   r   swapped_cuda_module  s    

r   )%r   
contextlibr   r:   r   r8   Z
numba.corer   numpyr%   Znumba.npr   r   objectr   r   r   r!   r*   r,   Lockr@   rF   rH   rJ   rL   rU   rW   r[   r_   rO   rQ   rS   r?   ra   r   r   r   r   r   r   <module>   s@   
/_\ 