U
    -ea)                     @   s   d dl mZ d dlZd dlZd dlZd dlZddlmZm	Z	 ddl
mZmZmZ ddlmZ ddlmZmZ daed	d
 Zdd ZG dd dZG dd deZG dd deZG dd dejZG dd deZdS )    )contextmanagerN   )FakeCUDAArrayFakeWithinKernelCUDAArray)Dim3FakeCUDAModuleswapped_cuda_module   )normalize_kernel_dimensions)wrap_argArgHintc                 c   s*   t dkstd| a z
dV  W 5 da X dS )z*
    Push the current kernel context.
    Nz)concurrent simulated kernel not supported)_kernel_contextAssertionError)mod r   \/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/numba/cuda/simulator/kernel.py_push_kernel_context   s
    
r   c                   C   s   t S )zT
    Get the current kernel context. This is usually done by a device function.
    )r   r   r   r   r   _get_kernel_context$   s    r   c                   @   s   e Zd ZdZdd ZdS )FakeOverloadzE
    Used only to provide the max_cooperative_grid_blocks method
    c                 C   s   dS )Nr   r   )selfZblockdimr   r   r   max_cooperative_grid_blocks/   s    z(FakeOverload.max_cooperative_grid_blocksN)__name__
__module____qualname____doc__r   r   r   r   r   r   +   s   r   c                   @   s   e Zd Zdd ZdS )FakeOverloadDictc                 C   s   t  S N)r   )r   keyr   r   r   __getitem__6   s    zFakeOverloadDict.__getitem__N)r   r   r   r   r   r   r   r   r   5   s   r   c                   @   sb   e Zd ZdZdg dfddZdd Zdd Zd	d
 Zdd ZdddZ	e
dd Ze
dd ZdS )FakeCUDAKernelz(
    Wraps a @cuda.jit-ed function.
    Fc                 C   sJ   || _ || _|| _|| _t|| _d | _d | _d| _d| _	t
| | d S )Nr   )fn_deviceZ	_fastmath_debuglist
extensionsgrid_dim	block_dimstreamdynshared_size	functoolsupdate_wrapper)r   r    ZdeviceZfastmathr$   debugr   r   r   __init__A   s    
zFakeCUDAKernel.__init__c           	   
      s   j r2tjt  j| W  5 Q R  S Q R X tjj\}}t||j}t	| g fdd  fdd|D }tj|8 t
j| D ]&}tj||j}|j|f|  qW 5 Q R X D ]
}|  qW 5 Q R X d S )Nc                    s   t  fddjd | f\}} t| tjrF| jdkrFt|  }n0t| t	r\|  }nt| tj
rrt| }n| }t|trt|S |S )Nc                    s   |j | d dS )Nr   )r'   retr)Zprepare_args)Zty_val	extension)r-   r   r   <lambda>b   s   z;FakeCUDAKernel.__call__.<locals>.fake_arg.<locals>.<lambda>r   )r)   reducer$   
isinstancenpZndarrayndimr   Z	to_devicer   voidr   r   )arg_ret)r-   r   r   r   fake_arg_   s    
	


z)FakeCUDAKernel.__call__.<locals>.fake_argc                    s   g | ]} |qS r   r   ).0r5   )r8   r   r   
<listcomp>v   s     z+FakeCUDAKernel.__call__.<locals>.<listcomp>)r!   r   r    r   r
   r%   r&   r   r(   r   r2   ndindexBlockManagerr"   run)	r   argsr%   r&   Zfake_cuda_moduleZ	fake_args
grid_pointZbmwbr   )r8   r-   r   r   __call__O   s&    
zFakeCUDAKernel.__call__c                 C   s2   t |d d  \| _| _t|dkr.|d | _| S )Nr	         )r
   r%   r&   lenr(   )r   configurationr   r   r   r      s
    

zFakeCUDAKernel.__getitem__c                 C   s   d S r   r   r   r   r   r   bind   s    zFakeCUDAKernel.bindc                 G   s   | S r   r   )r   r>   r   r   r   
specialize   s    zFakeCUDAKernel.specializer   c                 C   s$   |dk rt d| | |d||f S )Nr   z0Can't create ForAll with negative task count: %sr   )
ValueError)r   ZntasksZtpbr'   Z	sharedmemr   r   r   forall   s
    zFakeCUDAKernel.forallc                 C   s   t  S r   )r   rF   r   r   r   	overloads   s    zFakeCUDAKernel.overloadsc                 C   s   | j S r   )r    rF   r   r   r   py_func   s    zFakeCUDAKernel.py_funcN)r   r   r   )r   r   r   r   r,   rA   r   rG   rH   rJ   propertyrK   rL   r   r   r   r   r   <   s   1	

r   c                       sT   e Zd ZdZ fddZ fddZdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )BlockThreadzG
    Manages the execution of a function for a single CUDA thread.
    c           	         s   |r fdd}|}n }t t| j|d t | _d| _|| _t| | _	t| | _
d | _d| _d| _|| _t| jj }| j
j|j| j
j|j| j
j    | _d S )Nc                     s   t jdd  | | d S )Nraise)divide)r2   Zseterr)r>   kwargsfr   r   debug_wrapper   s    z+BlockThread.__init__.<locals>.debug_wrapper)targetFT)superrN   r,   	threadingEventsyncthreads_eventsyncthreads_blocked_managerr   blockIdx	threadIdx	exceptiondaemonabortr+   
_block_dimxyz	thread_id)	r   rS   managerr\   r]   r+   rT   rU   ZblockDim	__class__rR   r   r,      s(    


zBlockThread.__init__c              
      s   zt t|   W n tk
r } zfdt| j }dt| j }t|dkrZd||f }nd|||f }t	 d }t
|||f| _W 5 d }~X Y nX d S )Nztid=%szctaid=%s z%s %sz	%s %s: %sr	   )rV   rN   r=   	Exceptionr#   r]   r\   strsysexc_infotyper^   )r   etidZctaidmsgtbrg   r   r   r=      s    zBlockThread.runc                 C   s:   | j rtdd| _| j  | j  | j r6tdd S )Nz"abort flag set on syncthreads callTz#abort flag set on syncthreads clear)r`   RuntimeErrorrZ   rY   waitclearrF   r   r   r   syncthreads   s    

zBlockThread.syncthreadsc                 C   sD   | j j| j j| j jf}|| jj|< |   t| jj}|   |S r   )	r]   rb   rc   rd   r[   block_staterv   r2   Zcount_nonzero)r   valueidxcountr   r   r   syncthreads_count   s    zBlockThread.syncthreads_countc                 C   sL   | j j| j j| j jf}|| jj|< |   t| jj}|   |rHdS dS Nr   r   )	r]   rb   rc   rd   r[   rw   rv   r2   allr   rx   ry   testr   r   r   syncthreads_and   s    zBlockThread.syncthreads_andc                 C   sL   | j j| j j| j jf}|| jj|< |   t| jj}|   |rHdS dS r|   )	r]   rb   rc   rd   r[   rw   rv   r2   anyr~   r   r   r   syncthreads_or   s    zBlockThread.syncthreads_orc                 C   s   d| j | jf S )NzThread <<<%s, %s>>>)r\   r]   rF   r   r   r   __str__   s    zBlockThread.__str__)r   r   r   r   r,   r=   rv   r{   r   r   r   __classcell__r   r   rg   r   rN      s   rN   c                   @   s    e Zd ZdZdd Zdd ZdS )r<   a  
    Manages the execution of a thread block.

    When run() is called, all threads are started. Each thread executes until it
    hits syncthreads(), at which point it sets its own syncthreads_blocked to
    True so that the BlockManager knows it is blocked. It then waits on its
    syncthreads_event.

    The BlockManager polls threads to determine if they are blocked in
    syncthreads(). If it finds a blocked thread, it adds it to the set of
    blocked threads. When all threads are blocked, it unblocks all the threads.
    The thread are unblocked by setting their syncthreads_blocked back to False
    and setting their syncthreads_event.

    The polling continues until no threads are alive, when execution is
    complete.
    c                 C   s.   || _ || _|| _|| _tj|tjd| _d S )N)Zdtype)Z	_grid_dimra   _fr"   r2   ZzerosZbool_rw   )r   rS   r%   r&   r+   r   r   r   r,     s
    zBlockManager.__init__c           
         s"  t  }t  }t  }tjj D ]@} fdd}t|||j}|  || || q|r|D ]R}|jr~|| qh|j	rh|D ]}	d|	_
d|	_|	j   q|j	d |j	d qh||kr|D ]}d|_|j   qt  }t dd |D }q`|D ] }|j	r|j	d |j	d qd S )	Nc                      s   j    d S r   )r   r   r>   r   r   r   rU     s    z BlockManager.run.<locals>.targetTFr   r   c                 S   s   g | ]}|  r|qS r   )is_alive)r9   tr   r   r   r:   /  s      z$BlockManager.run.<locals>.<listcomp>)setr2   r;   ra   rN   r"   startaddrZ   r^   r`   rY   with_traceback)
r   r?   r>   threadsZlivethreadsZblockedthreadsZblock_pointrU   r   Zt_otherr   r   r   r=     s8    
zBlockManager.runN)r   r   r   r   r,   r=   r   r   r   r   r<      s   r<   )
contextlibr   r)   rl   rW   numpyr2   Zcudadrv.devicearrayr   r   Z	kernelapir   r   r   errorsr
   r>   r   r   r   r   r   r   dictr   objectr   ThreadrN   r<   r   r   r   r   <module>   s"   

dS