U
    9%e"                     @   s   d dl mZ d dlmZ d dlmZ d dlmZm	Z	m
Z
 G dd deZG dd de
ZG d	d
 d
e	ZG dd deZdZG dd dejZdZG dd dejZdS )    )cuda)array)deviceufunc)UFuncMechanismGeneralizedUFuncGUFuncCallStepsc                   @   s2   e Zd ZdZdd Zdd ZdddZd	d
 ZdS )CUDAUFuncDispatcherzD
    Invoke the CUDA ufunc specialization for the given inputs.
    c                 C   s   || _ |j| _d S N)	functions__name__)selfZtypes_to_retty_kernelspyfunc r   U/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/numba/cuda/vectorizers.py__init__   s    zCUDAUFuncDispatcher.__init__c                 O   s   t | j||S )a  
        *args: numpy arrays or DeviceArrayBase (created by cuda.to_device).
               Cannot mix the two types in one call.

        **kws:
            stream -- cuda stream; when defined, asynchronous mode is used.
            out    -- output array. Can be a numpy array or DeviceArrayBase
                      depending on the input arguments.  Type must match
                      the input arguments.
        )CUDAUFuncMechanismcallr
   )r   argskwsr   r   r   __call__   s    zCUDAUFuncDispatcher.__call__r   c              	   C   s   t t| j d dks"td|jdks4td|jd }g }|dkrTtdn|dkrd|d S |pnt	 }|
 P tjj|r|}nt||}| |||}td|jd}|j||d	 W 5 Q R X |d S )
Nr      zmust be a binary ufunc   zmust use 1d arrayzReduction on an empty array.)r   )dtypestream)lenlistr
   keysAssertionErrorndimshape	TypeErrorr   r   Zauto_synchronizecudadrvdevicearrayis_cuda_ndarray	to_device_CUDAUFuncDispatcher__reducenp_arrayr   copy_to_host)r   argr   ngpu_memsmemoutbufr   r   r   reduce   s"    "


zCUDAUFuncDispatcher.reducec           
      C   s   |j d }|d dkrd||d \}}|| || | |||}|| | ||||dS ||d \}}	|| ||	 | ||	||d |d dkr| |||S |S d S )Nr   r   r   )r-   r   )r    splitappendr&   )
r   r,   r+   r   r*   ZfatcutZthincutr-   leftrightr   r   r   Z__reduce;   s    





zCUDAUFuncDispatcher.__reduceN)r   )r   
__module____qualname____doc__r   r   r/   r&   r   r   r   r   r      s
   
r   c                       sR   e Zd ZdgZ fddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )_CUDAGUFuncCallSteps_streamc                    s$   t  |||| |dd| _d S )Nr   r   )superr   getr8   )r   ZninZnoutr   kwargs	__class__r   r   r   X   s    z_CUDAGUFuncCallSteps.__init__c                 C   s
   t |S r	   r   Zis_cuda_arrayr   objr   r   r   is_device_array\   s    z$_CUDAGUFuncCallSteps.is_device_arrayc                 C   s   t jj|r|S t |S r	   r   r"   r#   r$   Zas_cuda_arrayr?   r   r   r   as_device_array_   s    z$_CUDAGUFuncCallSteps.as_device_arrayc                 C   s   t j|| jdS Nr   )r   r%   r8   )r   hostaryr   r   r   r%   i   s    z_CUDAGUFuncCallSteps.to_devicec                 C   s   |j || jd}|S rD   )r(   r8   )r   devaryrE   r-   r   r   r   to_hostl   s    z_CUDAGUFuncCallSteps.to_hostc                 C   s   t j||| jdS N)r    r   r   )r   device_arrayr8   )r   r    r   r   r   r   allocate_device_arrayp   s    z*_CUDAGUFuncCallSteps.allocate_device_arrayc                 C   s   |j || jd|  d S rD   )forallr8   )r   ZkernelZnelemr   r   r   r   launch_kernels   s    z"_CUDAGUFuncCallSteps.launch_kernel)r   r4   r5   	__slots__r   rA   rC   r%   rG   rJ   rL   __classcell__r   r   r<   r   r7   S   s   
r7   c                       s8   e Zd Z fddZedd Zdd Zdd Z  ZS )	CUDAGeneralizedUFuncc                    s   |j | _ t || d S r	   )r   r9   r   )r   	kernelmapenginer   r<   r   r   r   x   s    zCUDAGeneralizedUFunc.__init__c                 C   s   t S r	   )r7   r   r   r   r   _call_steps|   s    z CUDAGeneralizedUFunc._call_stepsc                 C   s   t jjj|d|j|jdS N)r   r    stridesr   gpu_data)r   r"   r#   DeviceNDArrayr   rW   )r   aryr    r   r   r   _broadcast_scalar_input   s
    
z,CUDAGeneralizedUFunc._broadcast_scalar_inputc                 C   s:   t |t |j }d| |j }tjjj|||j|jdS rT   )	r   r    rV   r   r"   r#   rX   r   rW   )r   rY   ZnewshapeZnewaxZ
newstridesr   r   r   _broadcast_add_axis   s    
z(CUDAGeneralizedUFunc._broadcast_add_axis)	r   r4   r5   r   propertyrS   rZ   r[   rN   r   r   r<   r   rO   w   s
   
rO   c                   @   sL   e Zd ZdZdZdd Zdd Zdd Zd	d
 Zdd Z	dd Z
dd ZdS )r   z%
    Provide CUDA specialization
    r   c                 C   s   |j ||d|  d S rD   )rK   )r   funccountr   r   r   r   r   launch   s    zCUDAUFuncMechanism.launchc                 C   s
   t |S r	   r>   r?   r   r   r   rA      s    z"CUDAUFuncMechanism.is_device_arrayc                 C   s   t jj|r|S t |S r	   rB   r?   r   r   r   rC      s    z"CUDAUFuncMechanism.as_device_arrayc                 C   s   t j||dS rD   )r   r%   )r   rE   r   r   r   r   r%      s    zCUDAUFuncMechanism.to_devicec                 C   s   |j |dS rD   )r(   )r   rF   r   r   r   r   rG      s    zCUDAUFuncMechanism.to_hostc                 C   s   t j|||dS rH   )r   rI   )r   r    r   r   r   r   r   rJ      s    z(CUDAUFuncMechanism.allocate_device_arrayc                    sn    fddt tD }tt j }dg| t j }|D ]}d||< qFtjjj| j	 j
dS )Nc                    s,   g | ]$}| j ks$ j| | kr|qS r   )r   r    ).0axrY   r    r   r   
<listcomp>   s    
z7CUDAUFuncMechanism.broadcast_device.<locals>.<listcomp>r   rU   )ranger   r    r   rV   r   r"   r#   rX   r   rW   )r   rY   r    Z
ax_differsZ
missingdimrV   ra   r   rb   r   broadcast_device   s    

z#CUDAUFuncMechanism.broadcast_deviceN)r   r4   r5   r6   ZDEFAULT_STREAMr_   rA   rC   r%   rG   rJ   re   r   r   r   r   r      s   
r   z
def __vectorized_{name}({args}, __out__):
    __tid__ = __cuda__.grid(1)
    if __tid__ < __out__.shape[0]:
        __out__[__tid__] = __core__({argitems})
c                   @   s8   e Zd Zdd Zdd Zdd Zdd Zed	d
 ZdS )CUDAVectorizec                 C   s*   t j|ddd| j}||j|j jjfS )NT)deviceinline)r   jitr   Z	overloadsr   	signaturereturn_type)r   sigZcudevfnr   r   r   _compile_core   s    zCUDAVectorize._compile_corec                 C   s    | j j }|t|d |S )NZ__cuda__Z__core__)r   __globals__copyupdater   )r   corefnZglblr   r   r   _get_globals   s
    zCUDAVectorize._get_globalsc                 C   s
   t |S r	   r   ri   r   Zfnobjrl   r   r   r   _compile_kernel   s    zCUDAVectorize._compile_kernelc                 C   s   t | j| jS r	   )r   rP   r   rR   r   r   r   build_ufunc   s    zCUDAVectorize.build_ufuncc                 C   s   t S r	   )vectorizer_stager_sourcerR   r   r   r   _kernel_template   s    zCUDAVectorize._kernel_templateN)	r   r4   r5   rm   rs   rv   rw   r\   ry   r   r   r   r   rf      s   rf   zy
def __gufunc_{name}({args}):
    __tid__ = __cuda__.grid(1)
    if __tid__ < {checkedarg}:
        __core__({argitems})
c                   @   s0   e Zd Zdd Zdd Zedd Zdd Zd	S )
CUDAGUFuncVectorizec                 C   s"   t | j| j}t| j|| jdS )N)rP   rQ   r   )r   ZGUFuncEngineZinputsigZ	outputsigrO   rP   r   )r   rQ   r   r   r   rw      s
    zCUDAGUFuncVectorize.build_ufuncc                 C   s   t ||S r	   rt   ru   r   r   r   rv      s    z#CUDAGUFuncVectorize._compile_kernelc                 C   s   t S r	   )_gufunc_stager_sourcerR   r   r   r   ry      s    z$CUDAGUFuncVectorize._kernel_templatec                 C   s4   t j|dd| j}| jj }|t |d |S )NT)rg   rn   )r   ri   r   Zpy_funcro   rp   rq   )r   rl   rr   Zglblsr   r   r   rs      s    z CUDAGUFuncVectorize._get_globalsN)r   r4   r5   rw   rv   r\   ry   rs   r   r   r   r   rz      s
   
rz   N)Znumbar   numpyr   r'   Znumba.np.ufuncr   Znumba.np.ufunc.deviceufuncr   r   r   objectr   r7   rO   r   rx   ZDeviceVectorizerf   r{   ZDeviceGUFuncVectorizerz   r   r   r   r   <module>   s   K$0