U
    ,d6)                     @   s   d dl mZ d dlmZ d dlZd dlZd dlZddlm	Z	m
Z
 ddlmZmZmZ ddlmZ dd	lmZmZ daed
d Zdd ZG dd dZG dd deZG dd deZG dd dejZG dd deZdS )    )contextmanager)reduceN   )FakeCUDAArrayFakeWithinKernelCUDAArray)Dim3FakeCUDAModuleswapped_cuda_module   )normalize_kernel_dimensions)wrap_argArgHintc                 c   s*   t dkstd| a z
dV  W 5 da X dS )z*
    Push the current kernel context.
    Nz'conrrent simulated kernel not supported)_kernel_contextAssertionError)mod r   ?/tmp/pip-unpacked-wheel-eu7e0c37/numba/cuda/simulator/kernel.py_push_kernel_context   s
    
r   c                   C   s   t S )zT
    Get the current kernel context. This is usually done by a device function.
    )r   r   r   r   r   _get_kernel_context$   s    r   c                   @   s   e Zd ZdZdd ZdS )FakeOverloadzE
    Used only to provide the max_cooperative_grid_blocks method
    c                 C   s   dS )Nr   r   )selfZblockdimr   r   r   max_cooperative_grid_blocks/   s    z(FakeOverload.max_cooperative_grid_blocksN)__name__
__module____qualname____doc__r   r   r   r   r   r   +   s   r   c                   @   s   e Zd Zdd ZdS )FakeOverloadDictc                 C   s   t  S N)r   )r   keyr   r   r   __getitem__6   s    zFakeOverloadDict.__getitem__N)r   r   r   r   r   r   r   r   r   5   s   r   c                   @   sb   e Zd ZdZdg dfddZdd Zdd Zd	d
 Zdd ZdddZ	e
dd Ze
dd ZdS )FakeCUDAKernelz(
    Wraps a @cuda.jit-ed function.
    Fc                 C   s>   || _ || _|| _|| _t|| _d | _d | _d| _d| _	d S )Nr   )
fn_deviceZ	_fastmath_debuglist
extensionsgrid_dim	block_dimstreamdynshared_size)r   r!   ZdeviceZfastmathr%   debugr   r   r   __init__A   s    
zFakeCUDAKernel.__init__c           	   
      s   j r2tjt  j| W  5 Q R  S Q R X tjj\}}t||j}t	| g fdd  fdd|D }tj|8 t
j| D ]&}tj||j}|j|f|  qW 5 Q R X D ]
}|  qW 5 Q R X d S )Nc                    s   t  fddjd | f\}} t| tjrD| jdkrDt|  }n0t| trZ|  }nt| tj	rpt
| }n| }t|t
rt|S |S )Nc                    s   |j | d dS )Nr   )r(   retr)Zprepare_args)Zty_val	extension)r,   r   r   <lambda>a   s   z;FakeCUDAKernel.__call__.<locals>.fake_arg.<locals>.<lambda>r   )r   r%   
isinstancenpZndarrayndimr   Z	to_devicer   voidr   r   )arg_ret)r,   r   r   r   fake_arg^   s    
	


z)FakeCUDAKernel.__call__.<locals>.fake_argc                    s   g | ]} |qS r   r   ).0r3   )r6   r   r   
<listcomp>u   s     z+FakeCUDAKernel.__call__.<locals>.<listcomp>)r"   r	   r!   r   r   r&   r'   r   r)   r   r0   ndindexBlockManagerr#   run)	r   argsr&   r'   Zfake_cuda_moduleZ	fake_args
grid_pointZbmwbr   )r6   r,   r   r   __call__N   s&    
zFakeCUDAKernel.__call__c                 C   s2   t |d d  \| _| _t|dkr.|d | _| S )Nr
         )r   r&   r'   lenr)   )r   configurationr   r   r   r      s
    

zFakeCUDAKernel.__getitem__c                 C   s   d S r   r   r   r   r   r   bind   s    zFakeCUDAKernel.bindc                 G   s   | S r   r   )r   r<   r   r   r   
specialize   s    zFakeCUDAKernel.specializer   c                 C   s$   |dk rt d| | |d||f S )Nr   z0Can't create ForAll with negative task count: %sr   )
ValueError)r   ZntasksZtpbr(   Z	sharedmemr   r   r   forall   s
    zFakeCUDAKernel.forallc                 C   s   t  S r   )r   rD   r   r   r   	overloads   s    zFakeCUDAKernel.overloadsc                 C   s   | j S r   )r!   rD   r   r   r   py_func   s    zFakeCUDAKernel.py_funcN)r   r   r   )r   r   r   r   r+   r?   r   rE   rF   rH   propertyrI   rJ   r   r   r   r   r    <   s   1	

r    c                       sT   e Zd ZdZ fddZ fddZdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )BlockThreadzG
    Manages the execution of a function for a single CUDA thread.
    c           	         s   |r fdd}|}n }t t| j|d t | _d| _|| _t| | _	t| | _
d | _d| _d| _|| _t| jj }| j
j|j| j
j|j| j
j    | _d S )Nc                     s   t jdd  | | d S )Nraise)divide)r0   Zseterr)r<   kwargsfr   r   debug_wrapper   s    z+BlockThread.__init__.<locals>.debug_wrapper)targetFT)superrL   r+   	threadingEventsyncthreads_eventsyncthreads_blocked_managerr   blockIdx	threadIdx	exceptiondaemonabortr*   
_block_dimxyzZ	thread_id)	r   rQ   managerrZ   r[   r*   rR   rS   ZblockDim	__class__rP   r   r+      s(    


zBlockThread.__init__c              
      s   zt t|   W n tk
r } zfdt| j }dt| j }t|dkrZd||f }nd|||f }t	 d }t
|||f| _W 5 d }~X Y nX d S )Nztid=%szctaid=%s z%s %sz	%s %s: %sr
   )rT   rL   r;   	Exceptionr$   r[   rZ   strsysexc_infotyper\   )r   etidZctaidmsgtbrd   r   r   r;      s    zBlockThread.runc                 C   s:   | j rtdd| _| j  | j  | j r6tdd S )Nz"abort flag set on syncthreads callTz#abort flag set on syncthreads clear)r^   RuntimeErrorrX   rW   waitclearrD   r   r   r   syncthreads   s    

zBlockThread.syncthreadsc                 C   sD   | j j| j j| j jf}|| jj|< |   t| jj}|   |S r   )	r[   r`   ra   rb   rY   block_staters   r0   Zcount_nonzero)r   valueidxcountr   r   r   syncthreads_count   s    zBlockThread.syncthreads_countc                 C   sL   | j j| j j| j jf}|| jj|< |   t| jj}|   |rHdS dS Nr   r   )	r[   r`   ra   rb   rY   rt   rs   r0   allr   ru   rv   testr   r   r   syncthreads_and   s    zBlockThread.syncthreads_andc                 C   sL   | j j| j j| j jf}|| jj|< |   t| jj}|   |rHdS dS ry   )	r[   r`   ra   rb   rY   rt   rs   r0   anyr{   r   r   r   syncthreads_or   s    zBlockThread.syncthreads_orc                 C   s   d| j | jf S )NzThread <<<%s, %s>>>)rZ   r[   rD   r   r   r   __str__   s    zBlockThread.__str__)r   r   r   r   r+   r;   rs   rx   r}   r   r   __classcell__r   r   rd   r   rL      s   rL   c                   @   s    e Zd ZdZdd Zdd ZdS )r:   a  
    Manages the execution of a thread block.

    When run() is called, all threads are started. Each thread executes until it
    hits syncthreads(), at which point it sets its own syncthreads_blocked to
    True so that the BlockManager knows it is blocked. It then waits on its
    syncthreads_event.

    The BlockManager polls threads to determine if they are blocked in
    syncthreads(). If it finds a blocked thread, it adds it to the set of
    blocked threads. When all threads are blocked, it unblocks all the threads.
    The thread are unblocked by setting their syncthreads_blocked back to False
    and setting their syncthreads_event.

    The polling continues until no threads are alive, when execution is
    complete.
    c                 C   s.   || _ || _|| _|| _tj|tjd| _d S )N)Zdtype)Z	_grid_dimr_   _fr#   r0   zerosZbool_rt   )r   rQ   r&   r'   r*   r   r   r   r+     s
    zBlockManager.__init__c           
         s"  t  }t  }t  }tjj D ]@} fdd}t|||j}|  || || q|r|D ]R}|jr~|| qh|j	rh|D ]}	d|	_
d|	_|	j   q|j	d |j	d qh||kr|D ]}d|_|j   qt  }t dd |D }q`|D ] }|j	r|j	d |j	d qd S )	Nc                      s   j    d S r   )r   r   r<   r   r   r   rS     s    z BlockManager.run.<locals>.targetTFr   r   c                 S   s   g | ]}|  r|qS r   )is_alive)r7   tr   r   r   r8   .  s      z$BlockManager.run.<locals>.<listcomp>)setr0   r9   r_   rL   r#   startaddrX   r\   r^   rW   with_traceback)
r   r=   r<   threadsZlivethreadsZblockedthreadsZblock_pointrS   r   Zt_otherr   r   r   r;     s8    
zBlockManager.runN)r   r   r   r   r+   r;   r   r   r   r   r:      s   r:   )
contextlibr   	functoolsr   ri   rU   Znumpyr0   Zcudadrv.devicearrayr   r   Z	kernelapir   r   r	   errorsr   r<   r   r   r   r   r   r   dictr   objectr    ThreadrL   r:   r   r   r   r   <module>   s"   

cS