U
    9%e                  o   @   s  U d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
mZmZmZmZmZmZ ddlZddlZddlmZ ddlmZ ddlmZ d	d
lmZmZ d	dlmZmZmZm Z m!Z! d	dl"m#Z#m$Z$m%Z% zddlm&Z& W n e'k
r   dZ&Y nX da(e) Z*e+ Z,g a-e.ej/ddd Z0eee1e2df Z3dZ4dZ5zddl6Z6dZ4W n( e'k
r Z7 ze7Z5W 5 dZ7[7X Y nX G dd dZ8e8 a9e:ej/drej/j;Z;nedZ;e:ej/drej/j<Z=ne2e2dddZ=e:ej/drej/j>Z?ne2e2dddZ?dZ@eAeBd< dZCeAeBd< dZDeej/jE eBd < eAd!d"d#ZFeAd!d$d%ZGeAd!d&d'ZHd(d) ZId*d+ ZJd,d- ZKd.d/ ZLd0d1 ZMd2d3 ZNeNeK eNeL G d4d5 d5eOZPej/jQZRd6d7 ZSd8d9 ZTd:d; ZUG d<d= d=ZVG d>d? d?eWZXe2dd@dAdBZYG dCdD dDZZG dEdF dFZG dGdH dHeZ[e3dddIdJZ\dee3 e1ddKdLZ]dee3 ee2e2f ddMdNZ^e3e;ddOdPZ_e3e3eAdQdRdSZ`G dTdU dUZaedV eadWdXdYZbe%dZd[d\Zceee2 ee1 f d!d]d^Zde2d!d_d`Zeeee1  d!dadbZfee1 ee1 ee2 dcdddeZge2d!dfdgZheee2ef  e2ddhdiZie	d	dje2d!dkdlZjee1 d!dmdnZke1d!dodpZle2d!dqdrZmde3dddsdtZndudv Zodee3 e%ddwdxZpdee3 e%ddydzZqd{d| Zree2e1f dd}d~dZse2d!ddZtdeeee2f  dddZud eeee2f  e2dddZvdeeee2f  e2dddZwdeeee2f  e2dddZxdeeee2f  e2dddZydeeee2f  e2dddZzee2e1ejf ejdddZ{ejej/jEdddZ|de2ee2e1ejf ddddZ}dee2e1ejf e2dddZ~d	dlT d	dlT edd ZG dd dZddlmZmZ G dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZ[[eje eje eje eje eje eje eje eje eje eje eje eje G dd dZdd ZeNe d	dlmZmZmZmZmZ dddddddddddddddddddddddd?d5dddddUddddSdBd=d;d|drdxd dzdFdldHdddddndNdLdPdpddddddddd7ddvd'd)dd1ddddddddddddddddddddddddddddddddJdddd\dddYddtdgoZdS (  aM  
This package adds support for CUDA tensor types, that implement the same
function as CPU tensors, but they utilize GPUs for computation.

It is lazily initialized, so you can always import it, and use
:func:`is_available()` to determine if your system supports CUDA.

:ref:`cuda-semantics` has more details about working with CUDA.
    N)	lru_cache)AnycastListOptionalTupleUnion)Device   device)classproperty   )_dummy_type_get_device_index)	CUDAGraphgraphgraph_pool_handleis_current_stream_capturingmake_graphed_callables)EventExternalStreamStream)_cudartFZ_cuda_isInBadForkc                   C   s   dS NF r   r   r   R/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torch/cuda/__init__.py<lambda>-       r   Tc                   @   s2   e Zd Zdd Zdd Zdd Zeddd	Zd
S )_LazySeedTrackerc                 C   s   d | _ d | _g | _d S Nmanual_seed_all_cbmanual_seed_cb
call_orderselfr   r   r   __init__?   s    z_LazySeedTracker.__init__c                 C   s   ||f| _ | j| j g| _d S r    r!   r&   cb	tracebackr   r   r   queue_seed_allD   s    
z_LazySeedTracker.queue_seed_allc                 C   s   ||f| _ | j| j g| _d S r    )r#   r"   r$   r(   r   r   r   
queue_seedI   s    
z_LazySeedTracker.queue_seedreturnc                 C   s   | j S r    )r$   r%   r   r   r   	get_callsN   s    z_LazySeedTracker.get_callsN)__name__
__module____qualname__r'   r+   r,   r   r/   r   r   r   r   r   :   s   r   _CudaDeviceProperties_cuda_exchangeDevice)r   r.   c                 C   s   | dk rdS t dd S Nr   z)PyTorch was compiled without CUDA supportRuntimeErrorr   r   r   r   _exchange_device^   s    r9   _cuda_maybeExchangeDevicec                 C   s   | dk rdS t dd S r5   r7   r   r   r   r   _maybe_exchange_deviceh   s    r;   	has_magmahas_halfr   default_generatorsr-   c                   C   s   t tjdS )z*Returns true if compile with CUDA support._cuda_getDeviceCount)hasattrtorch_Cr   r   r   r   _is_compiledt   s    rC   c                   C   s   t ddkS )NZPYTORCH_NVML_BASED_CUDA_CHECK1)osgetenvr   r   r   r   _nvml_based_availy   s    rG   c                   C   s,   t  s
dS t rt dkS tj dkS dS )z9Returns a bool indicating if CUDA is currently available.Fr   N)rC   rG   device_countrA   rB   r?   r   r   r   r   is_available}   s
    
rI   c                  C   sT   t jjrdS t jj} | dk	r4t| dd dk}nd}t jt j jdkoR|S )zQReturns a bool indicating if the current CUDA/ROCm device supports dtype bfloat16TN.r      F   )	rA   versionZhipcudaintsplitget_device_propertiescurrent_devicemajor)Zcu_versZcuda_maj_decider   r   r   is_bf16_supported   s    rT   c                 C   s   t j|  d S r    )rA   rB   Z_cuda_sleep)Zcyclesr   r   r   _sleep   s    rU   c            
      C   s   d} d}t jjd k	rt j }tt D ]v}t|}|d }|d }t|}|d | }t	dd t j
 D dd	}	||	k r(t||||||	d |	d f  q(d S )
Nz
    Found GPU%d %s which requires CUDA_VERSION >= %d to
     work properly, but your PyTorch was compiled
     with CUDA_VERSION %d. Please install the correct PyTorch binary
     using instructions from https://pytorch.org
    z
    Found GPU%d %s which is of cuda capability %d.%d.
    PyTorch no longer supports this GPU because it is too old.
    The minimum cuda capability supported by this library is %d.%d.
    r   r   
   c                 s   s    | ]}t |d d V  qdS )_r   NrO   rP   .0archr   r   r   	<genexpr>   s     z$_check_capability.<locals>.<genexpr>#   )default)rA   rM   rN   rB   Z_cuda_getCompiledVersionrangerH   get_device_capabilityget_device_nameminget_arch_listwarningswarn)
Zincorrect_binary_warnZold_gpu_warnZCUDA_VERSIONd
capabilityrS   minornameZcurrent_archZmin_archr   r   r   _check_capability   s(    
rj   c               
      s   d} t jjd krd S t }t|dkr*d S dd |D }tt D ]Z}t|\ }t fdd|D }|sBt	|} d | }t
| ||d|| qBd S )	Na	  
{} with CUDA capability sm_{} is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities {}.
If you want to use the {} GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
r   c                 S   s&   g | ]}d |krt |dd qS )Zsm_rW   r   rX   rY   r   r   r   
<listcomp>   s      z!_check_cubins.<locals>.<listcomp>c                 3   s   | ]}|d   kV  qdS )rV   Nr   )rZ   smZ	cap_majorr   r   r\      s     z _check_cubins.<locals>.<genexpr>rV    )rA   rM   rN   rc   lenr_   rH   r`   anyra   rd   re   formatjoin)Zincompatible_device_warn	arch_listZsupported_smidxZ	cap_minor	supportedZdevice_namerg   r   rm   r   _check_cubins   s*       rv   c                   C   s   t o
t  S )z:Returns whether PyTorch's CUDA state has been initialized.)_initialized_is_in_bad_forkr   r   r   r   is_initialized   s    ry   c                 K   s`   t  r|   nN|ddr,t| t  n0|ddrJt| t  nt| t f d S )Nseed_allFseed)	ry   get_lazy_seed_trackerr+   r*   format_stackr,   _queued_callsappend)callablekwargsr   r   r   
_lazy_call   s    r   c                   @   s   e Zd ZdS )DeferredCudaCallErrorN)r0   r1   r2   r   r   r   r   r      s   r   c                   C   s
   t   dS )a  Initialize PyTorch's CUDA state.  You may need to call
    this explicitly if you are interacting with PyTorch via
    its C API, as Python bindings for CUDA functionality will not
    be available until this initialization takes place.  Ordinary users
    should not need this, as all of PyTorch's CUDA methods
    automatically initialize CUDA state on-demand.

    Does nothing if the CUDA state is already initialized.
    N)
_lazy_initr   r   r   r   init   s    
r   c                  C   s(  t  sttdrd S t t  r0W 5 Q R  d S t r>tdttjdsRtdt	d krbtddt
jkrvdt
jd< tj  dt_t D ]} | rt|  qzftD ]\\}}z
|  W q tk
r } z(d	t| d
d| }t||W 5 d }~X Y qX qW 5 ttd X daW 5 Q R X d S )Nis_initializingzwCannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start methodr?   z$Torch not compiled with CUDA enabledzGlibcudart functions unavailable. It looks like you have a broken build?ZCUDA_MODULE_LOADINGZLAZYTz6CUDA call failed lazily at initialization with error: z(

CUDA call was originally invoked at:

 )ry   r@   _tls_initialization_lockrx   r8   rA   rB   AssertionErrorr   rE   environZ
_cuda_initr   r}   r/   r   r   delattr	Exceptionstrrr   r   rw   )ZcallsZqueued_callZorig_tracebackemsgr   r   r   r     s@    



"r   c                   C   s
   t   tS r    )r   r   r   r   r   r   cudartC  s    r   c                   @   s&   e Zd ZU dZeed< dZeed< dS )
cudaStatusr   SUCCESS"   ERROR_NOT_READYN)r0   r1   r2   r   rO   __annotations__r   r   r   r   r   r   H  s   
r   c                       s$   e Zd Zedd fddZ  ZS )	CudaErrorN)coder.   c                    s,   t t |}t | d| d d S )Nz ())r   ZcudaGetErrorString	cudaErrorsuperr'   )r&   r   r   	__class__r   r   r'   N  s    zCudaError.__init__)r0   r1   r2   rO   r'   __classcell__r   r   r   r   r   M  s   r   )resr.   c                 C   s   | t jjkrt| d S r    )r   r   successr   )r   r   r   r   check_errorS  s    r   c                   @   s4   e Zd ZedddZdd ZeeedddZd	S )
_DeviceGuard)indexc                 C   s   || _ d| _d S Nr6   )rt   prev_idx)r&   r   r   r   r   r'   Y  s    z_DeviceGuard.__init__c                 C   s   t j| j| _d S r    rA   rN   r9   rt   r   r%   r   r   r   	__enter__]  s    z_DeviceGuard.__enter__typevaluer*   c                 C   s   t j| j| _dS r   rA   rN   r;   r   rt   r&   r   r   r*   r   r   r   __exit__`  s    z_DeviceGuard.__exit__N)r0   r1   r2   rO   r'   r   r   r   r   r   r   r   r   X  s   r   c                   @   s8   e Zd ZdZedddZdd Zeeeddd	Zd
S )r   zContext-manager that changes the selected device.

    Args:
        device (torch.device or int): device index to select. It's a no-op if
            this argument is a negative integer or ``None``.
    r   c                 C   s   t |dd| _d| _d S )NToptionalr6   )r   rt   r   )r&   r   r   r   r   r'   m  s    zdevice.__init__c                 C   s   t j| j| _d S r    r   r%   r   r   r   r   q  s    zdevice.__enter__r   c                 C   s   t j| j| _dS r   r   r   r   r   r   r   t  s    zdevice.__exit__N)r0   r1   r2   __doc__r   r'   r   r   r   r   r   r   r   e  s   r   c                       s    e Zd ZdZ fddZ  ZS )	device_ofa  Context-manager that changes the current device to that of given object.

    You can use both tensors and storages as arguments. If a given object is
    not allocated on a GPU, this is a no-op.

    Args:
        obj (Tensor or Storage): object allocated on the selected device.
    c                    s"   |j r| nd}t | d S r   )is_cuda
get_devicer   r'   )r&   objrt   r   r   r   r'     s    zdevice_of.__init__)r0   r1   r2   r   r'   r   r   r   r   r   r   y  s   	r   c                 C   s    t | } | dkrtj|  dS )a>  Sets the current device.

    Usage of this function is discouraged in favor of :any:`device`. In most
    cases it's better to use ``CUDA_VISIBLE_DEVICES`` environmental variable.

    Args:
        device (torch.device or int): selected device. This function is a no-op
            if this argument is negative.
    r   N)r   rA   rB   Z_cuda_setDevicer   r   r   r   
set_device  s    
r   c                 C   s
   t | jS )a  Gets the name of a device.

    Args:
        device (torch.device or int, optional): device for which to return the
            name. This function is a no-op if this argument is a negative
            integer. It uses the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Returns:
        str: the name of the device
    )rQ   ri   r   r   r   r   ra     s    ra   c                 C   s   t | }|j|jfS )a  Gets the cuda capability of a device.

    Args:
        device (torch.device or int, optional): device for which to return the
            device capability. This function is a no-op if this argument is
            a negative integer. It uses the current device, given by
            :func:`~torch.cuda.current_device`, if :attr:`device` is ``None``
            (default).

    Returns:
        tuple(int, int): the major and minor cuda capability of the device
    )rQ   rS   rh   )r   propr   r   r   r`     s    r`   c                 C   s4   t   t| dd} | dk s$| t kr,tdt| S )zGets the properties of a device.

    Args:
        device (torch.device or int or str): device for which to return the
            properties of the device.

    Returns:
        _CudaDeviceProperties: the properties of the device
    Tr   r   Invalid device id)r   r   rH   r   Z_get_device_propertiesr   r   r   r   rQ     s
    
rQ   )r   peer_devicer.   c                 C   s\   t   t| dd} t|}| dk s,| t kr4td|dk sF|t krNtdtj| |S )z6Checks if peer access between two devices is possible.Tr   r   r   zInvalid peer device id)r   r   rH   r   rA   rB   Z_cuda_canDeviceAccessPeer)r   r   r   r   r   can_device_access_peer  s    r   c                   @   sJ   e Zd ZU dZed ed< ed dddZdd Zeeed	d
dZ	dS )StreamContexta  Context-manager that selects a given stream.

    All CUDA kernels queued within its context will be enqueued on a selected
    stream.

    Args:
        Stream (Stream): selected stream. This manager is a no-op if it's
            ``None``.
    .. note:: Streams are per-device.
    torch.cuda.Stream
cur_streamstreamc                 C   sh   || _ td d| _tj s,| jd kr,d| _tj s:d n
tjd | _tj sVd n
tjd | _	d S )NTr6   )
r   r   rt   rA   ZjitZis_scriptingrN   default_streamsrc_prev_streamdst_prev_stream)r&   r   r   r   r   r'     s    

zStreamContext.__init__c              	   C   sn   | j }|d ks| jdkrd S tjd | _| jj|jkr^t|j tj|j| _W 5 Q R X tj| d S r   )	r   rt   rA   rN   current_streamr   r   r   
set_stream)r&   r   r   r   r   r     s    zStreamContext.__enter__r   c                 C   sJ   | j }|d ks| jdkrd S | jj|jkr8tj| j tj| j d S r   )r   rt   r   r   rA   rN   r   r   )r&   r   r   r*   r   r   r   r   r     s    zStreamContext.__exit__N)
r0   r1   r2   r   r   r   r'   r   r   r   r   r   r   r   r     s
   

r   r   )r   r.   c                 C   s   t | S )aT  Wrapper around the Context-manager StreamContext that
    selects a given stream.

    Arguments:
        stream (Stream): selected stream. This manager is a no-op if it's
            ``None``.
    ..Note:: In eager mode stream is of type Stream class while in JIT it is
    an object of the custom class ``torch.classes.cuda.Stream``.
    )r   r   r   r   r   r     s    
r   r   c                 C   s(   | dkrdS t jj| j| j| jd dS )a  Sets the current stream.This is a wrapper API to set the stream.
        Usage of this function is discouraged in favor of the ``stream``
        context manager.

    Args:
        stream (Stream): selected stream. This function is a no-op
            if this argument is ``None``.
    N	stream_iddevice_indexdevice_type)rA   rB   Z_cuda_setStreamr   r   r   r   r   r   r   r     s    	r   c                  C   s   t d} | dkrttdS ttddd}tttt ddd	}| d
rX|| d
S | drl|| dS g }| dD ]@}||	 }||krt
tt g   S |dk r q|| qz|S )z0Parse CUDA_VISIBLE_DEVICES environment variable.ZCUDA_VISIBLE_DEVICESN@   )sr.   c                 S   sj   | sdS t | D ]<\}}| s4|dkr0|dks4 qN|d t| kr|d7 }q|dkrft| d| S dS )z:Return -1 or positive integer sequence string starts with,r6   r   z+-r   N)	enumerateisdigitro   rO   )r   rt   cr   r   r   _strtoul1  s    
z(_parse_visible_devices.<locals>._strtoul)lstprefixr.   c                 S   sJ   g }|  dD ]6}||kr,ttt g   S ||s: qF|| q|S )N,)rP   r   r   r   
startswithr   )r   r   rcselemr   r   r   parse_list_with_prefix<  s    
z6_parse_visible_devices.<locals>.parse_list_with_prefixzGPU-MIG-r   r   )rE   rF   listr_   r   rO   r   r   rP   stripr   r   )varr   r   rcr   xr   r   r   _parse_visible_devices+  s$    




r   c                  C   sn   ddl m} m}m} |d}| }|dkr:td dS |d}|| |}|dkrftd dS ~|jS )zkReturn number of devices as reported by NVML
    or negative value if NVML discovery/initialization failed.r   )byrefc_intCDLLlibnvidia-ml.so.1Can't initialize NVMLr6   Can't get nvml device count)	ctypesr   r   r   nvmlInitrd   re   nvmlDeviceGetCount_v2r   )r   r   r   nvml_hr   	dev_countr   r   r   _raw_device_count_nvml[  s    

r   c                  C   s   ddl m} m}m}m}m} |d}| }|dkrBtd dS |d}|	| |}|dkrntd dS g }t
|jD ]|}	| }
||	| |
}|dkrtd  dS d	}||}||
||}|dkrtd
  dS ||jdd q|~|S )zbReturn list of device UUID as reported by NVML
    or None if NVM discovery/initialization failed.r   )r   r   c_void_pr   create_string_bufferr   r   Nr6   r   zCan't get device handle`   zCan't get device UUIDascii )r   r   r   r   r   r   r   rd   re   r   r_   r   ZnvmlDeviceGetHandleByIndex_v2ZnvmlDeviceGetUUIDr   rawdecoder   )r   r   r   r   r   r   r   r   uuidsrt   Zdev_idbuf_lenbufr   r   r   _raw_device_uuid_nvmln  s6    



r   )
candidatesr   r.   c                 C   sb   t tt  tddd}g }| D ]>}|||}|dk r8 q^||krRttt g   S || q|S )ztGiven the set of partial uuids and list of known uuids builds
    a set of ordinals excluding ambiguous partials IDs)	candidater   r.   c                 S   s8   d}t |D ]&\}}|| s q|dkr. dS |}q|S r   )r   r   )r   r   
best_matchrt   uuidr   r   r   uuid_to_orinal  s    
z3_transform_uuid_to_ordinals.<locals>.uuid_to_orinalr   )r   r   rO   r   r   )r   r   r   r   r   rt   r   r   r   _transform_uuid_to_ordinals  s    
r   c                  C   s   t  } | sdS zt| d tkr^| d dr4W dS t }|dkrHW dS tttt | |} n>t }|dkrr|W S t	| D ] \}}tt
||krz|  W S qzW n* tk
r   Y dS  tk
r   Y dS X t| S )zReturn number of devices as reported by NVML taking CUDA_VISIBLE_DEVICES into account.
    Negative value is returned if NVML discovery or initialization has failed.r   r   r6   N)r   r   r   r   r   r   r   r   r   r   rO   OSErrorAttributeErrorro   )visible_devicesr   Zraw_cntrt   valr   r   r   _device_count_nvml  s2     r   c                 C   s   t | dd}t }t|d tkrLt }|dkr8tdtttt ||}t	t
ttt |}||krtd| d| d|| S )	zOReturns the NVML index of the device, taking CUDA_VISIBLE_DEVICES into account.Tr   r   NzCan't get device UUIDszdevice z& is not visible (CUDA_VISIBLE_DEVICES=r   )r   r   r   r   r   r8   r   r   r   dictr   rO   )r   rt   r   r   Zidx_mapr   r   r   _get_nvml_device_index  s      r   )maxsizec                  C   s&   t  s
dS t } | dk r"tj S | S )z%Returns the number of GPUs available.r   )rC   r   rA   rB   r?   )Z
nvml_countr   r   r   rH     s    rH   c                  C   s(   t  s
g S tj } | dkr g S |  S )z>Returns list CUDA architectures this library was compiled for.N)rI   rA   rB   Z_cuda_getArchFlagsrP   )Z
arch_flagsr   r   r   rc     s    
rc   c                  C   s8   t  } t| dkrdS dd | D }ddd |D S )z:Returns NVCC gencode flags this library was compiled with.r   r   c                 S   s   g | ]}| d qS )rW   )rP   rY   r   r   r   rk     s     z%get_gencode_flags.<locals>.<listcomp>rn   c                 S   s&   g | ]\}}d | d| d| qS )z-gencode compute=compute_z,code=rW   r   )rZ   kindr[   r   r   r   rk     s   )rc   ro   rr   )rs   Z
arch_list_r   r   r   get_gencode_flags  s    r  c                   C   s   t   tj S )z1Returns the index of a currently selected device.)r   rA   rB   Z_cuda_getDevicer   r   r   r   rR     s    rR   c              
   C   s4   t   tj|  tj W  5 Q R  S Q R X dS )a-  Waits for all kernels in all streams on a CUDA device to complete.

    Args:
        device (torch.device or int, optional): device for which to synchronize.
            It uses the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).
    N)r   rA   rN   r   rB   Z_cuda_synchronizer   r   r   r   synchronize  s    r  c                   C   s   t   tj S )ax  Force collects GPU memory after it has been released by CUDA IPC.

    .. note::
        Checks if any sent CUDA tensors could be cleaned from the memory. Force
        closes shared memory file used for reference counting if there is no
        active counters. Useful when the producer process stopped actively sending
        tensors and want to release unused memory.
    )r   rA   rB   Z_cuda_ipc_collectr   r   r   r   ipc_collect  s    	r  c                 C   s4   t   tjt| dd}t|d |d |d dS )aT  Returns the currently selected :class:`Stream` for a given device.

    Args:
        device (torch.device or int, optional): selected device. Returns
            the currently selected :class:`Stream` for the current device, given
            by :func:`~torch.cuda.current_device`, if :attr:`device` is ``None``
            (default).
    Tr   r   r   r
   r   )r   rA   rB   Z_cuda_getCurrentStreamr   r   r   Z
streamdatar   r   r   r     s    	
  r   c                 C   s4   t   tjt| dd}t|d |d |d dS )a>  Returns the default :class:`Stream` for a given device.

    Args:
        device (torch.device or int, optional): selected device. Returns
            the default :class:`Stream` for the current device, given by
            :func:`~torch.cuda.current_device`, if :attr:`device` is ``None``
            (default).
    Tr   r   r   r
   r   )r   rA   rB   Z_cuda_getDefaultStreamr   r   r  r   r   r   r   1  s    	
  r   c                   C   s   t   tj S )z7Returns cublasHandle_t pointer to current cuBLAS handle)r   rA   rB   Z_cuda_getCurrentBlasHandler   r   r   r   current_blas_handleC  s    r  )
debug_moder.   c                 C   sR   t   t| trB| dkrd} n$| dkr,d} n| dkr:d} ntdtj|  dS )	a   Sets the debug mode for cuda synchronizing operations.

    Args:
        debug_mode(str or int): if "default" or 0, don't error or warn on synchronizing operations,
            if "warn" or 1, warn on synchronizing operations, if "error" or 2, error out synchronizing operations.

    Warning:
        This is an experimental feature, and not all synchronizing operations will trigger warning or error. In
        particular, operations in torch.distributed and torch.sparse namespaces are not covered yet.
    r^   r   re   r   errorr
   zGinvalid value of debug_mode, expected one of `default`, `warn`, `error`N)r   
isinstancer   r8   rA   rB   Z_cuda_set_sync_debug_mode)r  r   r   r   set_sync_debug_modeI  s    
r  c                   C   s   t   tj S )zFReturns current value of debug mode for cuda synchronizing operations.)r   rA   rB   Z_cuda_get_sync_debug_moder   r   r   r   get_sync_debug_modee  s    r  c              
   C   sj   t stdtddlm} zt  W n, |k
rR } ztd|W 5 d }~X Y nX t| } t| }|S )Nz=pynvml does not seem to be installed or it can't be imported.r   )NVMLError_DriverNotLoadedz-cuda driver can't be loaded, is cuda enabled?)	_HAS_PYNVMLModuleNotFoundError_PYNVML_ERRpynvmlr  r   r8   r   nvmlDeviceGetHandleByIndex)r   r  r   handler   r   r   _get_pynvml_handlerl  s    
r  c                 C   s$   t  }t| } t| }t|jS )a  Returns the percent of time over the past sample period during which global (device)
    memory was being read or written. as given by `nvidia-smi`.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    )r  r   r  r  nvmlDeviceGetUtilizationRatesmemoryr   r  r   r   r   memory_usage}  s    
r  c                 C   s&   t | }t| } t| }t|jS )a  Returns the percent of time over the past sample period during which one or
    more kernels was executing on the GPU as given by `nvidia-smi`.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    )r  r   r  r  r  Zgpur  r   r   r   utilization  s    
r  c                 C   s   t | }t|dS )a  Returns the average temperature of the GPU sensor in Degrees C (Centigrades)
        over the past sample period as given by `nvidia-smi`.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    r   )r  r  ZnvmlDeviceGetTemperaturer  r   r   r   temperature  s    r  c                 C   s   t | }t|S )a
  Returns the average power draw of the GPU sensor in mW (MilliWatts)
        over the past sample period as given by `nvidia-smi` for Fermi or newer fully supported devices.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    )r  r  ZnvmlDeviceGetPowerUsager  r   r   r   
power_draw  s    r  c                 C   s   t | }t|dS )a  Returns the clock speed of the GPU SM in Hz Hertz over the past sample period as given by `nvidia-smi`.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    Warning: Each sample period may be between 1 second and 1/6 second,
    depending on the product being queried.
    r   )r  r  ZnvmlDeviceGetClockInfor  r   r   r   
clock_rate  s    r  c                 C   s0   t | trt| } nt | tr,td| } | S )zReturn the torch.device type object from the passed in device.

    Args:
        device (torch.device or int): selected device.
    rN   )r
  r   rA   r   rO   r   r   r   r   _get_device  s
    

r  c                 C   s    | j }|dkrt }tjj| S )zvReturn the CUDA Generator object for the given device.

    Args:
        device (torch.device): selected device.
    N)r   rR   rA   rN   r>   )r   rt   r   r   r   _get_generator  s    r  rN   )offsetr   r.   c                    s"   t |  fdd}t| dS )a(  Sets the random number generator state offset of the specified GPU.

    Args:
        offset (int): The desired offset
        device (torch.device or int, optional): The device to set the RNG state.
            Default: ``'cuda'`` (i.e., ``torch.device('cuda')``, the current CUDA device).
    c                     s   t  } |  d S r    )r  Z
set_offset)default_generatorfinal_devicer  r   r   r)     s    z!_set_rng_state_offset.<locals>.cbN)r  r   )r  r   r)   r   r!  r   _set_rng_state_offset  s    
r#  c                 C   s   t   t| }t|}| S )aQ  Returns the random number generator state offset of the specified GPU.

    Args:
        device (torch.device or int, optional): The device to return the RNG state offset of.
            Default: ``'cuda'`` (i.e., ``torch.device('cuda')``, the current CUDA device).

    .. warning::
        This function eagerly initializes CUDA.
    )r   r  r  Z
get_offset)r   r"  r   r   r   r   _get_rng_state_offset   s    
r$  )*c                 O   s   t   tt| j| f||S r    )r   r   	_CudaBase__new__clsargsr   r   r   r   	_lazy_new  s    r+  c                       s(   e Zd ZdZdZ fddZeZ  ZS )r&  TFc              
      s2   t |   t j||W  5 Q R  S Q R X d S r    )r   r   r   r   )r&   r*  r   r   r   r   r   &  s    z_CudaBase.type)	r0   r1   r2   r   Z	is_sparser   r+  r'  r   r   r   r   r   r&  "  s   r&  )_LegacyStorage_warn_typed_storage_removalc                   @   s8   e Zd Zedd Zedd ZedddddZdS )	_CudaLegacyStoragec                 O   s   t   tdd S )Nz+from_buffer: Not available for CUDA storage)r-  r8   r(  r   r   r   from_buffer4  s    z_CudaLegacyStorage.from_bufferc                 O   s   t dd S )Nz2_new_with_weak_ptr: Not available for CUDA storager7   r(  r   r   r   _new_with_weak_ptr9  s    z%_CudaLegacyStorage._new_with_weak_ptrN)r   dtypec                C   s   t dd S )Nz4_new_shared_filename: Not available for CUDA storager7   )r)  managerr   sizer   r1  r   r   r   _new_shared_filename=  s    z'_CudaLegacyStorage._new_shared_filename)r0   r1   r2   classmethodr/  r0  r4  r   r   r   r   r.  3  s   

r.  c                   @   s$   e Zd Zedd Zedd ZdS )ByteStoragec                 C   s   t   | jS r    r-  _dtyper%   r   r   r   r1  C  s    zByteStorage.dtypec                 C   s   t jS r    )rA   Zuint8r%   r   r   r   r8  H  s    zByteStorage._dtypeNr0   r1   r2   r   r1  r8  r   r   r   r   r6  B  s   
r6  c                   @   s$   e Zd Zedd Zedd ZdS )DoubleStoragec                 C   s   t   | jS r    r7  r%   r   r   r   r1  N  s    zDoubleStorage.dtypec                 C   s   t jS r    )rA   doubler%   r   r   r   r8  S  s    zDoubleStorage._dtypeNr9  r   r   r   r   r:  M  s   
r:  c                   @   s$   e Zd Zedd Zedd ZdS )FloatStoragec                 C   s   t   | jS r    r7  r%   r   r   r   r1  Y  s    zFloatStorage.dtypec                 C   s   t jS r    )rA   floatr%   r   r   r   r8  ^  s    zFloatStorage._dtypeNr9  r   r   r   r   r<  X  s   
r<  c                   @   s$   e Zd Zedd Zedd ZdS )HalfStoragec                 C   s   t   | jS r    r7  r%   r   r   r   r1  d  s    zHalfStorage.dtypec                 C   s   t jS r    )rA   Zhalfr%   r   r   r   r8  i  s    zHalfStorage._dtypeNr9  r   r   r   r   r>  c  s   
r>  c                   @   s$   e Zd Zedd Zedd ZdS )LongStoragec                 C   s   t   | jS r    r7  r%   r   r   r   r1  o  s    zLongStorage.dtypec                 C   s   t jS r    )rA   longr%   r   r   r   r8  t  s    zLongStorage._dtypeNr9  r   r   r   r   r?  n  s   
r?  c                   @   s$   e Zd Zedd Zedd ZdS )
IntStoragec                 C   s   t   | jS r    r7  r%   r   r   r   r1  z  s    zIntStorage.dtypec                 C   s   t jS r    )rA   rO   r%   r   r   r   r8    s    zIntStorage._dtypeNr9  r   r   r   r   rA  y  s   
rA  c                   @   s$   e Zd Zedd Zedd ZdS )ShortStoragec                 C   s   t   | jS r    r7  r%   r   r   r   r1    s    zShortStorage.dtypec                 C   s   t jS r    )rA   shortr%   r   r   r   r8    s    zShortStorage._dtypeNr9  r   r   r   r   rB    s   
rB  c                   @   s$   e Zd Zedd Zedd ZdS )CharStoragec                 C   s   t   | jS r    r7  r%   r   r   r   r1    s    zCharStorage.dtypec                 C   s   t jS r    )rA   Zint8r%   r   r   r   r8    s    zCharStorage._dtypeNr9  r   r   r   r   rD    s   
rD  c                   @   s$   e Zd Zedd Zedd ZdS )BoolStoragec                 C   s   t   | jS r    r7  r%   r   r   r   r1    s    zBoolStorage.dtypec                 C   s   t jS r    )rA   boolr%   r   r   r   r8    s    zBoolStorage._dtypeNr9  r   r   r   r   rE    s   
rE  c                   @   s$   e Zd Zedd Zedd ZdS )BFloat16Storagec                 C   s   t   | jS r    r7  r%   r   r   r   r1    s    zBFloat16Storage.dtypec                 C   s   t jS r    )rA   Zbfloat16r%   r   r   r   r8    s    zBFloat16Storage._dtypeNr9  r   r   r   r   rG    s   
rG  c                   @   s$   e Zd Zedd Zedd ZdS )ComplexDoubleStoragec                 C   s   t   | jS r    r7  r%   r   r   r   r1    s    zComplexDoubleStorage.dtypec                 C   s   t jS r    )rA   Zcdoubler%   r   r   r   r8    s    zComplexDoubleStorage._dtypeNr9  r   r   r   r   rH    s   
rH  c                   @   s$   e Zd Zedd Zedd ZdS )ComplexFloatStoragec                 C   s   t   | jS r    r7  r%   r   r   r   r1    s    zComplexFloatStorage.dtypec                 C   s   t jS r    )rA   Zcfloatr%   r   r   r   r8    s    zComplexFloatStorage._dtypeNr9  r   r   r   r   rI    s   
rI  c                   @   s    e Zd ZdZdd Zdd ZdS )_WrappedTritonKernelzBJust a simple wrapper to store some metadata for testing purposes.c                 C   s   || _ d| _d S r   kernelZkernel_invoked)r&   rL  r   r   r   r'     s    z_WrappedTritonKernel.__init__c                 O   s   | j ||}d| _|S )NTrK  )r&   r*  r   r   r   r   r   __call__  s    z_WrappedTritonKernel.__call__N)r0   r1   r2   r   r'   rM  r   r   r   r   rJ    s   rJ  c                  C   sB   t  rd S tdd } tjdd k	}|r>t jdd| d d S )Nc                  _   s   ddl m} || ddi|S )Nr   )bsr_dense_mmZskip_checksT)Ztorch.sparse._triton_opsrN  )r*  r   rN  r   r   r   kernel_impl  s    z-_register_triton_kernels.<locals>.kernel_implZtritonZ_triton_bsr_dense_mm_outzS_triton_bsr_dense_mm_out(Tensor bsr, Tensor dense, *, Tensor(a!) out) -> Tensor(a!)ZSparseCsrCUDA)rA   Z_running_with_deployrJ  	importlibutil	find_specZ_TritonLibraryZ
registerOp)rO  Z
has_tritonr   r   r   _register_triton_kernels  s    
rS  )amp	jiteratornvtxprofilersparseZBFloat16TensorZ
BoolTensorZ
ByteTensorZ
CharTensorZDoubleTensorZFloatTensorZ
HalfTensorZ	IntTensorZ
LongTensorZShortTensorr   r   r   OutOfMemoryErrorr   rT  Zcaching_allocator_allocZcaching_allocator_deleteZempty_cacheZget_allocator_backendZCUDAPluggableAllocatorZchange_current_allocatorZget_rng_stateZget_rng_state_allr   r   graphsZinitial_seedr   rU  Zlist_gpu_processesr   Zmanual_seedZmanual_seed_allZmax_memory_allocatedZmax_memory_cachedZmax_memory_reservedZmem_get_infor  Zmemory_allocatedZmemory_cachedZmemory_reservedZmemory_snapshotZmemory_statsZmemory_stats_as_nested_dictZmemory_summaryZncclrV  rW  randomZreset_accumulated_memory_statsZreset_max_memory_allocatedZreset_max_memory_cachedZreset_peak_memory_statsr{   rz   Zset_per_process_memory_fractionZset_rng_stateZset_rng_state_allrX  streams)N)N)N)N)N)N)N)N)N)N)N)rN   )rN   )r   
contextlibrP  rE   sys	threadingr*   rd   	functoolsr   typingr   r   r   r   r   r   rA   Ztorch._CZtorch.typesr	   r   r   Z_device_utilsr   r   r   rZ  r   r   r   r   r   r\  r   r   r   r   ImportErrorrw   localr   Lockr   r   getattrrB   rx   r   rO   Z	_device_tr  r  r  errr   r}   r@   r3   r4   r9   r:   r;   r<   rF  r   r=   r>   	GeneratorrC   rG   rI   rT   rU   rj   rv   ry   r   r   r   Z_OutOfMemoryErrorrY  r   r   r   r   r8   r   r   r   r   r   ra   r`   rQ   r   r   r   r   r   r   r   r   r   r   rH   rc   r  rR   r  r  r   r   r  r  r  r  r  r  r  r  r  r  r  r#  r$  r  r[  staticmethodr+  r&  Ztorch.storager,  r-  r.  r6  r:  r<  r>  r?  rA  rB  rD  rE  rG  rH  rI  Z_storage_classesaddrJ  rS  rT  rU  rV  rW  rX  __all__r   r   r   r   <module>   s  
 



!780! 
       
