U
    9%e4]                     @   s  d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZmZ ddlZddlmZ ddlmZmZmZ ddlmZmZmZ ddlmZmZ eeZdZdZd	Z d
Z!dZ"eZ#eZ$d% Z&e'e&D ]\Z(Z)e*ej+e e)e( qdZ,dZ-dd Z.e/ Z0G dd de1Z2G dd de1Z3dZ4ddddddddddd
Z5dd Z6dd Z7dd  Z8d!d" Z9d#Z:G d$d% d%e1Z;d&Z<d'Z=d(Z>d)Z?d*Z@d+d, ZAd-d. ZBd/d0 ZCd1d2 ZDd3d4 ZEd5d6 ZFd7d8 ZGeHd9ZId:d; ZJd<d= ZKd>d? ZLdS )@z(
This is a direct translation of nvvm.h
    N)c_void_pc_intPOINTERc_char_pc_size_tbyrefc_char)ir   )	NvvmErrorNvvmSupportErrorNvvmWarning)get_libdeviceopen_libdeviceopen_cudalib)cgutilsconfig         a  
NVVM_SUCCESS
NVVM_ERROR_OUT_OF_MEMORY
NVVM_ERROR_PROGRAM_CREATION_FAILURE
NVVM_ERROR_IR_VERSION_MISMATCH
NVVM_ERROR_INVALID_INPUT
NVVM_ERROR_INVALID_PROGRAM
NVVM_ERROR_INVALID_IR
NVVM_ERROR_INVALID_OPTION
NVVM_ERROR_NO_MODULE_IN_PROGRAM
NVVM_ERROR_COMPILATION
ze-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64ze-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64c                   C   s*   z
t   W n tk
r    Y dS X dS dS )z(
    Return if libNVVM is available
    FTN)NVVMr    r   r   V/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/numba/cuda/cudadrv/nvvm.pyis_available<   s
    
r   c                   @   s   e Zd ZdZeeeeefeeefeeefeeee	efeeee	efeeeeefeeee	feeefeeee	feeefeeeeeeeeefeeeeefdZ
dZdd Zdd Zedd	 Zed
d Zdd Zdd ZdddZdS )r   zProcess-wide singleton.
    )nvvmVersionnvvmCreateProgramnvvmDestroyProgramnvvmAddModuleToProgramnvvmLazyAddModuleToProgramnvvmCompileProgramnvvmGetCompiledResultSizenvvmGetCompiledResultnvvmGetProgramLogSizenvvmGetProgramLognvvmIRVersionnvvmVerifyProgramNc                 C   s   t  | jd krt|  | _}ztd|_W n8 tk
rf } zd | _d}t|| W 5 d }~X Y nX |j	 D ]8\}}t
|j|}|d |_|dd  |_t||| qrW 5 Q R X | jS )NZnvvmz;libNVVM cannot be found. Do `conda install cudatoolkit`:
%sr   r
   )
_nvvm_lock_NVVM__INSTANCEobject__new__r   driverOSErrorr   _PROTOTYPESitemsgetattrrestypeZargtypessetattr)clsinsteerrmsgnameprotofuncr   r   r   r)      s    

zNVVM.__new__c                 C   s<   |   }|d | _|d | _|d | _|d | _t | _d S )Nr   r
      r   )get_ir_version_majorIR_minorIRZ	_majorDbgZ	_minorDbgget_supported_ccs_supported_ccs)selfir_versionsr   r   r   __init__   s    



zNVVM.__init__c                 C   s   | j | jfdk rtS tS d S )N)r
      )r:   r;   _datalayout_original_datalayout_i128r>   r   r   r   data_layout   s    zNVVM.data_layoutc                 C   s   | j S N)r=   rD   r   r   r   supported_ccs   s    zNVVM.supported_ccsc                 C   s8   t  }t  }| t|t|}| |d |j|jfS )NzFailed to get version.)r   r   r   check_errorvalue)r>   majorminorerrr   r   r   get_version   s
    zNVVM.get_versionc                 C   sX   t  }t  }t  }t  }| t|t|t|t|}| |d |j|j|j|jfS )NzFailed to get IR version.)r   r$   r   rH   rI   )r>   ZmajorIRZminorIRZmajorDbgZminorDbgrL   r   r   r   r9      s     zNVVM.get_ir_versionFc                 C   s2   |r.t |t| }|r*t| td n|d S )Nr
   )r   RESULT_CODE_NAMESprintsysexit)r>   errormsgrQ   excr   r   r   rH      s    zNVVM.check_error)F)__name__
__module____qualname____doc__nvvm_resultr   r   nvvm_programr   r   r,   r'   r)   r@   propertyrE   rG   rM   r9   rH   r   r   r   r   r   K   s\   

              6


r   c                   @   sD   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dS )CompilationUnitc                 C   s4   t  | _t | _| jt| j}| j|d d S )NzFailed to create CU)r   r*   rZ   _handler   r   rH   )r>   rL   r   r   r   r@      s    zCompilationUnit.__init__c                 C   s*   t  }|t| j}|j|ddd d S )NzFailed to destroy CUT)rQ   )r   r   r   r]   rH   )r>   r*   rL   r   r   r   __del__   s    zCompilationUnit.__del__c                 C   s*   | j | j|t|d}| j |d dS )z
         Add a module level NVVM IR to a compilation unit.
         - The buffer should contain an NVVM module IR either in the bitcode
           representation (LLVM3.0) or in the text representation.
        NFailed to add module)r*   r   r]   lenrH   r>   bufferrL   r   r   r   
add_module   s
     zCompilationUnit.add_modulec                 C   s*   | j | j|t|d}| j |d dS )z
        Lazily add an NVVM IR module to a compilation unit.
        The buffer should contain NVVM module IR either in the bitcode
        representation or in the text representation.
        Nr_   )r*   r   r]   r`   rH   ra   r   r   r   lazy_add_module   s
     zCompilationUnit.lazy_add_modulec                 K   s  g }d|kr | d|d  |dr>| d|d  d}|D ]8}||krFtt||}| d|dd|f  qF|rd	tt|	 }t
d
|tt| dd |D  }| j| jt||}| |d | j| jt||}| |d t }	| j| jt|	}| |d t|	j  }
| j| j|
}| |d |  | _| jrttj| jtd |
dd S )aj  Perform Compilation

        The valid compiler options are

         *   - -opt=
         *     - 0 (disable optimizations)
         *     - 3 (default, enable optimizations)
         *   - -arch=
         *     - compute_XX where XX is in (35, 37, 50, 52, 53, 60, 61, 62, 70,
         *                                  72, 75, 80, 86, 89, 90).
         *       The default is compute_52.
         *   - -ftz=
         *     - 0 (default, preserve denormal values, when performing
         *          single-precision floating-point operations)
         *     - 1 (flush denormal values to zero, when performing
         *          single-precision floating-point operations)
         *   - -prec-sqrt=
         *     - 0 (use a faster approximation for single-precision
         *          floating-point square root)
         *     - 1 (default, use IEEE round-to-nearest mode for
         *          single-precision floating-point square root)
         *   - -prec-div=
         *     - 0 (use a faster approximation for single-precision
         *          floating-point division and reciprocals)
         *     - 1 (default, use IEEE round-to-nearest mode for
         *          single-precision floating-point division and reciprocals)
         *   - -fma=
         *     - 0 (disable FMA contraction)
         *     - 1 (default, enable FMA contraction)
         *
         optz-opt=%darchz-arch=%s)ftz	prec_sqrtprec_divfmaz-%s=%d_-z, zunsupported option {0}c                 S   s   g | ]}t |d qS )utf8)r   encode).0xr   r   r   
<listcomp>$  s   z+CompilationUnit.compile.<locals>.<listcomp>zFailed to verify
zFailed to compile
z&Failed to get size of compiled result.zFailed to get compiled result.)categoryN)appendpopgetintboolreplacejoinmapreprkeysr   formatr   r`   r*   r%   r]   
_try_errorr   r   r    r   r   rI   r!   get_loglogwarningswarnr   )r>   optionsoptsZother_optionskvZoptstrZc_optsrL   reslenZptxbufr   r   r   compile   s<    "

zCompilationUnit.compilec                 C   s   | j |d||  f  d S )Nz%s
%s)r*   rH   r   )r>   rL   rS   r   r   r   r~   ?  s    zCompilationUnit._try_errorc                 C   sl   t  }| j| jt|}| j|d |jdkrht|j  }| j| j|}| j|d |j	dS dS )Nz#Failed to get compilation log size.r
   zFailed to get compilation log.rm    )
r   r*   r"   r]   r   rH   rI   r   r#   decode)r>   r   rL   Zlogbufr   r   r   r   B  s    
zCompilationUnit.get_logN)
rU   rV   rW   r@   r^   rc   rd   r   r~   r   r   r   r   r   r\      s   

Vr\   )r   r   )r      r   r   )r   r8   )r   r   )   r   )r   r
   )r   r8   )r   r   )r   r8   )r   r   )rA   r   rA   r   rA   r   )rA   	   r   r   )r   r   )r   r   )r   r   )r   r   )
)   r8   )r   r   )r   r   )r   r   )r   r   )r   r   )r   rA   )   r   )r   r
   )r   r8   c                    sR   z&t |  \ t fddtD W S  tk
rL   tdd tD  Y S X d S )Nc                    s(   g | ] }|  kr krn q|qS r   r   ro   ccZmax_ccZmin_ccr   r   rq   m  s
     
 z(ccs_supported_by_ctk.<locals>.<listcomp>c                 S   s   g | ]}|t jkr|qS r   )r   ZCUDA_DEFAULT_PTX_CCr   r   r   r   rq   r  s    
)CTK_SUPPORTEDtupleCOMPUTE_CAPABILITIESKeyError)Zctk_versionr   r   r   ccs_supported_by_ctki  s
    r   c                  C   s   zddl m}  |  }W n   d}| Y S X tt}||k rd}|d  d|d  }d| d|d  d|d  d}t| |S t|}|S )	Nr   )runtimer   .r
   zCUDA Toolkit z is unsupported by Numba - z! is the minimum required version.)Znumba.cuda.cudadrv.runtimer   rM   minr   r   r   r   )r   Zcudart_versionZ_supported_ccZ
min_cudartZctk_verZunsupported_verr   r   r   r<   v  s    
 
r<   c                 C   sx   t  j}|sd}t|t|D ]N\}}|| kr8|  S || kr |dkr^d| |  }t|q ||d    S q |d S )z
    Given a compute capability, return the closest compute capability supported
    by the CUDA toolkit.

    :param mycc: Compute capability as a tuple ``(MAJOR, MINOR)``
    :return: Closest supported CC as a tuple ``(MAJOR, MINOR)``
    zmNo supported GPU compute capabilities found. Please check your cudatoolkit version matches your CUDA version.r   z?GPU compute capability %d.%d is not supported(requires >=%d.%d)r
   )r   rG   r   	enumerate)ZmyccrG   rS   ir   r   r   r   find_closest_arch  s    
r   c                 C   s"   t jrt j}nt| |f}d| S )z1Matches with the closest architecture option
    zcompute_%d%d)r   ZFORCE_CUDA_CCr   )rJ   rK   rf   r   r   r   get_arch_option  s    r   z~Missing libdevice file.
Please ensure you have package cudatoolkit >= 11.0
Install package by:

    conda install cudatoolkit
c                   @   s    e Zd ZdZdd Zdd ZdS )	LibDeviceNc                 C   s0   | j d kr$t d krttt | _ | j | _d S rF   )_cache_r   RuntimeErrorMISSING_LIBDEVICE_FILE_MSGr   bcrD   r   r   r   r@     s
    

zLibDevice.__init__c                 C   s   | j S rF   )r   rD   r   r   r   ru     s    zLibDevice.get)rU   rV   rW   r   r@   ru   r   r   r   r   r     s   r   z
    %cas_success = cmpxchg volatile {Ti}* %iptr, {Ti} %old, {Ti} %new monotonic monotonic
    %cas = extractvalue {{ {Ti}, i1 }} %cas_success, 0
a  
define internal {T} @___numba_atomic_{T}_{FUNC}({T}* %ptr, {T} %val) alwaysinline {{
entry:
    %iptr = bitcast {T}* %ptr to {Ti}*
    %old2 = load volatile {Ti}, {Ti}* %iptr
    br label %attempt

attempt:
    %old = phi {Ti} [ %old2, %entry ], [ %cas, %attempt ]
    %dold = bitcast {Ti} %old to {T}
    %dnew = {OP} {T} %dold, %val
    %new = bitcast {T} %dnew to {Ti}
    {CAS}
    %repeat = icmp ne {Ti} %cas, %old
    br i1 %repeat, label %attempt, label %done

done:
    %result = bitcast {Ti} %old to {T}
    ret {T} %result
}}
a  
define internal {T} @___numba_atomic_{Tu}_inc({T}* %iptr, {T} %val) alwaysinline {{
entry:
    %old2 = load volatile {T}, {T}* %iptr
    br label %attempt

attempt:
    %old = phi {T} [ %old2, %entry ], [ %cas, %attempt ]
    %bndchk = icmp ult {T} %old, %val
    %inc = add {T} %old, 1
    %new = select i1 %bndchk, {T} %inc, {T} 0
    {CAS}
    %repeat = icmp ne {T} %cas, %old
    br i1 %repeat, label %attempt, label %done

done:
    ret {T} %old
}}
a  
define internal {T} @___numba_atomic_{Tu}_dec({T}* %iptr, {T} %val) alwaysinline {{
entry:
    %old2 = load volatile {T}, {T}* %iptr
    br label %attempt

attempt:
    %old = phi {T} [ %old2, %entry ], [ %cas, %attempt ]
    %dec = add {T} %old, -1
    %bndchk = icmp ult {T} %dec, %val
    %new = select i1 %bndchk, {T} %dec, {T} %val
    {CAS}
    %repeat = icmp ne {T} %cas, %old
    br i1 %repeat, label %attempt, label %done

done:
    ret {T} %old
}}
a  
define internal {T} @___numba_atomic_{T}_{NAN}{FUNC}({T}* %ptr, {T} %val) alwaysinline {{
entry:
    %ptrval = load volatile {T}, {T}* %ptr
    ; Return early when:
    ; - For nanmin / nanmax when val is a NaN
    ; - For min / max when val or ptr is a NaN
    %early_return = fcmp uno {T} %val, %{PTR_OR_VAL}val
    br i1 %early_return, label %done, label %lt_check

lt_check:
    %dold = phi {T} [ %ptrval, %entry ], [ %dcas, %attempt ]
    ; Continue attempts if dold less or greater than val (depending on whether min or max)
    ; or if dold is NaN (for nanmin / nanmax)
    %cmp = fcmp {OP} {T} %dold, %val
    br i1 %cmp, label %attempt, label %done

attempt:
    ; Attempt to swap in the value
    %old = bitcast {T} %dold to {Ti}
    %iptr = bitcast {T}* %ptr to {Ti}*
    %new = bitcast {T} %val to {Ti}
    {CAS}
    %dcas = bitcast {Ti} %cas to {T}
    br label %lt_check

done:
    ret {T} %ptrval
}}
c                 C   s   t j| dS )NTi)cas_nvvmr}   r   r   r   r   ir_cas7  s    r   c                 C   s"   t | |||t|d}tjf |S )N)Tr   OPFUNCCAS)dictr   ir_numba_atomic_binary_templater}   )r   r   r   r   paramsr   r   r   ir_numba_atomic_binary;  s    r   c              	   C   s&   t | |||||t|d}tjf |S )N)r   r   NANr   
PTR_OR_VALr   r   )r   r   ir_numba_atomic_minmax_templater}   )r   r   r   r   r   r   r   r   r   r   ir_numba_atomic_minmax@  s
     r   c                 C   s   t j| |t| dS N)r   Tur   )ir_numba_atomic_inc_templater}   r   r   r   r   r   r   ir_numba_atomic_incG  s    r   c                 C   s   t j| |t| dS r   )ir_numba_atomic_dec_templater}   r   r   r   r   r   ir_numba_atomic_decK  s    r   c                 C   s0  dt dddddfdt dd	d
ddfdt ddd
ddfdtdddfdtdddfdtdd	dddddfdtdddddddfdtdd	dddddfdtdddddddfdtdd	dddddfdtdddddddfd tdd	dd!dddfd"tdddd!dddfd#g}|D ]\}}| ||} qt| } | S )$NzIdeclare double @"___numba_atomic_double_add"(double* %".1", double %".2")doubleZi64Zfaddadd)r   r   r   r   zEdeclare float @"___numba_atomic_float_sub"(float* %".1", float %".2")floati32ZfsubsubzIdeclare double @"___numba_atomic_double_sub"(double* %".1", double %".2")z=declare i64 @"___numba_atomic_u64_inc"(i64* %".1", i64 %".2")Zu64r   z=declare i64 @"___numba_atomic_u64_dec"(i64* %".1", i64 %".2")zEdeclare float @"___numba_atomic_float_max"(float* %".1", float %".2")r   znnan oltZptrmax)r   r   r   r   r   r   zIdeclare double @"___numba_atomic_double_max"(double* %".1", double %".2")zEdeclare float @"___numba_atomic_float_min"(float* %".1", float %".2")znnan ogtr   zIdeclare double @"___numba_atomic_double_min"(double* %".1", double %".2")zHdeclare float @"___numba_atomic_float_nanmax"(float* %".1", float %".2")nanZultzLdeclare double @"___numba_atomic_double_nanmax"(double* %".1", double %".2")zHdeclare float @"___numba_atomic_float_nanmin"(float* %".1", float %".2")ZugtzLdeclare double @"___numba_atomic_double_nanmin"(double* %".1", double %".2"))Zimmargr   )r   r   r   r   rx   llvm140_to_70_ir)llvmirZreplacementsdeclfnr   r   r   llvm_replaceO  s    


 
 
 
 
 
 
 
 &r   c                 K   sx   t | tr| g} |ddr0|ddddd t }t }| D ]}t|}||d q@|	|
  |jf |S )NZfastmathFT)rg   rj   ri   rh   rm   )
isinstancestrrt   updater\   r   r   rc   rn   rd   ru   r   )r   r   cuZ	libdevicemodr   r   r   llvm_to_ptx~  s     
r   z"^attributes #\d+ = \{ ([\w\s]+)\ }c                 C   sn   g }|   D ]V}|drXt|}|d }ddd |D }||d|}|| qd|S )z,
    Convert LLVM 14.0 IR for LLVM 7.0.
    zattributes #r
    c                 s   s   | ]}|d kr|V  qdS )Z
willreturnNr   )ro   ar   r   r   	<genexpr>  s      z#llvm140_to_70_ir.<locals>.<genexpr>
)	
splitlines
startswithre_attributes_defmatchgroupsplitry   rx   rs   )r	   buflinemattrsr   r   r   r     s    

r   c                 C   sZ   | j }t|d}ttdd}|| ||f}t|d}|| | j	
d d S )NZkernel    r
   znvvm.annotationsZnoinline)moduler	   ZMetaDataStringConstantIntTypeadd_metadatar   Zget_or_insert_named_metadatar   
attributesdiscard)Zlfuncr   ZmdstrZmdvaluemdZnmdr   r   r   set_cuda_kernel  s    
r   c                    s<   t d  fddt  D }| |}| d| dS )zAdd NVVM IR version to moduler   c                    s   g | ]} |qS r   r   )ro   r   r   r   r   rq     s     z"add_ir_version.<locals>.<listcomp>znvvmir.versionN)r	   r   r   r9   r   Zadd_named_metadata)r   r?   Zmd_verr   r   r   add_ir_version  s    

r   )MrX   loggingrerP   r   ctypesr   r   r   r   r   r   r   	threadingZllvmliter	   rR   r   r   r   Zlibsr   r   r   Z
numba.corer   r   	getLoggerrU   loggerZADDRSPACE_GENERICZADDRSPACE_GLOBALZADDRSPACE_SHAREDZADDRSPACE_CONSTANTZADDRSPACE_LOCALrZ   rY   r   rN   r   r   r   r0   modulesrB   rC   r   Lockr&   r(   r   r\   r   r   r   r<   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s|   $
~ 	"
 /
