U
    d_Z                     @   s  d dl Z d dlZd dlZd dlmZmZmZ d dlZddlm	Z	m
Z
mZ d dlmZ dddd	d
dddddddddddddddgZdd Zejdd Zd7eeef dddZd d Zd8eeef dd!d"dZdd#d$d	Zd9eeef eeef d!d%d
Zd:eeef eeef d!d&dZd;eeef dd!d'dZd<eeef dd!d(dZd=eeef dd!d)dZd>eeef dd!d*dZd?eeef ed!d+dZd@eeef ed!d,dZdAeeef ed!d-dZ dBeeef ed!d.dZ!dCeeef ed!d/dZ"dDeeef ed!d0dZ#d1d Z$dEeeef e%ed3d4dZ&dFeeef ed!d5dZ'dGeeef ed!d6dZ(dS )H    N)AnyDictUnion   )is_initialized_get_device_index
_lazy_init)Devicecaching_allocator_alloccaching_allocator_deleteset_per_process_memory_fractionempty_cachememory_statsmemory_stats_as_nested_dictreset_accumulated_memory_statsreset_peak_memory_statsreset_max_memory_allocatedreset_max_memory_cachedmemory_allocatedmax_memory_allocatedmemory_reservedmax_memory_reservedmemory_cachedmax_memory_cachedmemory_snapshotmemory_summarylist_gpu_processesmem_get_infoc                   C   s   t   tj S N)r   torch_CZ_cuda_cudaHostAllocator r!   r!   5/tmp/pip-unpacked-wheel-ua33x9lu/torch/cuda/memory.py_host_allocator   s    r#   c                   c   s&   t j  z
d V  W 5 t j  X d S r   )r   r    Z_cuda_lock_mutexZ_cuda_unlock_mutexr!   r!   r!   r"   _free_mutex   s    

r$   devicec              
   C   s   |dkrt j }t|}|dkr.t j|}t|t jjjrD|j}t|t	sVt
dt j| t j| |W  5 Q R  S Q R X dS )a  Performs a memory allocation using the CUDA memory allocator.

    Memory is allocated for a given device and a stream, this
    function is intended to be used for interoperability with other
    frameworks. Allocated memory is released through
    :func:`~torch.cuda.caching_allocator_delete`.

    Args:
        size (int): number of bytes to be allocated.
        device (torch.device or int, optional): selected device. If it is
            ``None`` the default CUDA device is used.
        stream (torch.cuda.Stream or int, optional): selected stream. If is ``None`` then
            the default stream for the selected device is used.

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    NzsInvalid type for stream argument, must be `torch.cuda.Stream` or `int` representing a pointer to a exisiting stream)r   cudacurrent_devicer   Zcurrent_stream
isinstanceZstreamsZStreamZcuda_streamint	TypeErrorr&   r    Z$_cuda_cudaCachingAllocator_raw_alloc)sizer&   streamr!   r!   r"   r
      s    

c                 C   s   t j|  dS )a  Deletes memory allocated using the CUDA memory allocator.

    Memory allocated with :func:`~torch.cuda.caching_allocator_alloc`.
    is freed here. The associated device and stream are tracked inside
    the allocator.

    Args:
        mem_ptr (int): memory address to be freed by the allocator.

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    N)r   r    Z%_cuda_cudaCachingAllocator_raw_delete)Zmem_ptrr!   r!   r"   r   A   s    )r&   returnc                 C   sb   t   |dkrtj }t|}t| ts2td| dk sB| dkrPtd	| tj
| | dS )a  Set memory fraction for a process.
    The fraction is used to limit an caching allocator to allocated memory on a CUDA device.
    The allowed value equals the total visible memory multiplied fraction.
    If trying to allocate more than the allowed value in a process, will raise an out of
    memory error in allocator.

    Args:
        fraction(float): Range: 0~1. Allowed memory equals total_memory * fraction.
        device (torch.device or int, optional): selected device. If it is
            ``None`` the default CUDA device is used.
    .. note::
        In general, the total available free memory is less than the total capacity.
    Nz3Invalid type for fraction argument, must be `float`r   r   z.Invalid fraction value: {}. Allowed range: 0~1)r   r   r'   r(   r   r)   floatr+   
ValueErrorformatr    Z_cuda_setMemoryFraction)fractionr&   r!   r!   r"   r   R   s    

)r.   c                   C   s   t  rtj  dS )a  Releases all unoccupied cached memory currently held by the caching
    allocator so that those can be used in other GPU application and visible in
    `nvidia-smi`.

    .. note::
        :func:`~torch.cuda.empty_cache` doesn't increase the amount of GPU
        memory available for PyTorch. However, it may help reduce fragmentation
        of GPU memory in certain cases. See :ref:`cuda-memory-management` for
        more details about GPU memory management.
    N)r   r   r    Z_cuda_emptyCacher!   r!   r!   r"   r   m   s    c                    s8   g  fdd t | d} d|   tS )aX  Returns a dictionary of CUDA memory allocator statistics for a
    given device.

    The return value of this function is a dictionary of statistics, each of
    which is a non-negative integer.

    Core statistics:

    - ``"allocated.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      number of allocation requests received by the memory allocator.
    - ``"allocated_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      amount of allocated memory.
    - ``"segment.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      number of reserved segments from ``cudaMalloc()``.
    - ``"reserved_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      amount of reserved memory.
    - ``"active.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      number of active memory blocks.
    - ``"active_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      amount of active memory.
    - ``"inactive_split.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      number of inactive, non-releasable memory blocks.
    - ``"inactive_split_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      amount of inactive, non-releasable memory.

    For these core statistics, values are broken down as follows.

    Pool type:

    - ``all``: combined statistics across all memory pools.
    - ``large_pool``: statistics for the large allocation pool
      (as of October 2019, for size >= 1MB allocations).
    - ``small_pool``: statistics for the small allocation pool
      (as of October 2019, for size < 1MB allocations).

    Metric type:

    - ``current``: current value of this metric.
    - ``peak``: maximum value of this metric.
    - ``allocated``: historical total increase in this metric.
    - ``freed``: historical total decrease in this metric.

    In addition to the core statistics, we also provide some simple event
    counters:

    - ``"num_alloc_retries"``: number of failed ``cudaMalloc`` calls that
      result in a cache flush and retry.
    - ``"num_ooms"``: number of out-of-memory errors thrown.

    The caching allocator can be configured via ENV to not split blocks larger than a
    defined size (see Memory Management section of the Cuda Semantics documentation).
    This helps avoid memory framentation but may have a performance
    penalty. Additional outputs to assist with tuning and evaluating impact:

    - ``"max_split_size"``: blocks above this size will not be split.
    - ``"oversize_allocations.{current,peak,allocated,freed}"``:
      number of over-size allocation requests received by the memory allocator.
    - ``"oversize_segments.{current,peak,allocated,freed}"``:
      number of over-size reserved segments from ``cudaMalloc()``.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistics for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    c                    sR   t |tr@t| dkr| d7 } | D ]\}} | | | q&n| |f d S )Nr   .)r)   dictlenitemsappend)prefixobjkv_recurse_add_to_resultresultr!   r"   r=      s    
z,memory_stats.<locals>._recurse_add_to_resultr%    )r   sortcollectionsOrderedDict)r&   statsr!   r<   r"   r   |   s    F	

c                 C   s"   t  s
i S t| dd} tj| S )zNReturns the result of :func:`~torch.cuda.memory_stats` as a nested dictionary.Toptional)r   r   r   r    Z_cuda_memoryStatsr%   r!   r!   r"   r      s    c                 C   s   t | dd} tj| S )a~  Resets the "accumulated" (historical) stats tracked by the CUDA memory allocator.

    See :func:`~torch.cuda.memory_stats` for details. Accumulated stats correspond to
    the `"allocated"` and `"freed"` keys in each individual stat dict, as well as
    `"num_alloc_retries"` and `"num_ooms"`.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    TrD   )r   r   r    Z!_cuda_resetAccumulatedMemoryStatsr%   r!   r!   r"   r      s    c                 C   s   t | dd} tj| S )a  Resets the "peak" stats tracked by the CUDA memory allocator.

    See :func:`~torch.cuda.memory_stats` for details. Peak stats correspond to the
    `"peak"` key in each individual stat dict.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    TrD   )r   r   r    Z_cuda_resetPeakMemoryStatsr%   r!   r!   r"   r      s    c                 C   s   t dt t| dS )a  Resets the starting point in tracking maximum GPU memory occupied by
    tensors for a given device.

    See :func:`~torch.cuda.max_memory_allocated` for details.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. warning::
        This function now calls :func:`~torch.cuda.reset_peak_memory_stats`, which resets
        /all/ peak memory stats.

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    zytorch.cuda.reset_max_memory_allocated now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.r%   warningswarnFutureWarningr   r%   r!   r!   r"   r     s
    c                 C   s   t dt t| dS )a  Resets the starting point in tracking maximum GPU memory managed by the
    caching allocator for a given device.

    See :func:`~torch.cuda.max_memory_cached` for details.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. warning::
        This function now calls :func:`~torch.cuda.reset_peak_memory_stats`, which resets
        /all/ peak memory stats.

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    zvtorch.cuda.reset_max_memory_cached now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.r%   rF   r%   r!   r!   r"   r     s
    c                 C   s   t | dddS )a`  Returns the current GPU memory occupied by tensors in bytes for a given
    device.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        This is likely less than the amount shown in `nvidia-smi` since some
        unused memory can be held by the caching allocator and some context
        needs to be created on GPU. See :ref:`cuda-memory-management` for more
        details about GPU memory management.
    r%   zallocated_bytes.all.currentr   r   getr%   r!   r!   r"   r   7  s    c                 C   s   t | dddS )a  Returns the maximum GPU memory occupied by tensors in bytes for a given
    device.

    By default, this returns the peak allocated memory since the beginning of
    this program. :func:`~torch.cuda.reset_peak_memory_stats` can be used to
    reset the starting point in tracking this metric. For example, these two
    functions can measure the peak allocated memory usage of each iteration in a
    training loop.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    r%   zallocated_bytes.all.peakr   rJ   r%   r!   r!   r"   r   I  s    c                 C   s   t | dddS )a  Returns the current GPU memory managed by the caching allocator in bytes
    for a given device.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    r%   zreserved_bytes.all.currentr   rJ   r%   r!   r!   r"   r   _  s    c                 C   s   t | dddS )a   Returns the maximum GPU memory managed by the caching allocator in bytes
    for a given device.

    By default, this returns the peak cached memory since the beginning of this
    program. :func:`~torch.cuda.reset_peak_memory_stats` can be used to reset
    the starting point in tracking this metric. For example, these two functions
    can measure the peak cached memory amount of each iteration in a training
    loop.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    r%   zreserved_bytes.all.peakr   rJ   r%   r!   r!   r"   r   o  s    c                 C   s   t dt t| dS )z4Deprecated; see :func:`~torch.cuda.memory_reserved`.zGtorch.cuda.memory_cached has been renamed to torch.cuda.memory_reservedr%   )rG   rH   rI   r   r%   r!   r!   r"   r     s
    c                 C   s   t dt t| dS )z8Deprecated; see :func:`~torch.cuda.max_memory_reserved`.zOtorch.cuda.max_memory_cached has been renamed to torch.cuda.max_memory_reservedr%   )rG   rH   rI   r   r%   r!   r!   r"   r     s
    c                   C   s
   t j S )a)  Returns a snapshot of the CUDA memory allocator state across all devices.

    Interpreting the output of this function requires familiarity with the
    memory allocator internals.

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    )r   r    Z_cuda_memorySnapshotr!   r!   r!   r"   r     s    
F)r&   abbreviatedr.   c                 C   st  t | dd} t| d}dd }dd }dd	|fd
d|fdd|fdd|fdd|fdd|fdd|fdd|fg}g }|d |d |d |d |d |d |D ]\}}}	|d d|fg}
|s|
d |
d d \}}}}|
D ]\}}|d! | d! }||d"  }||d#  }||d$  }||d%  }|d&krV|}|}|}|}|d'||	|||	|||	|||	|| qqd(d)|fd*d+|fg}|D ]~\}}}	|d |d! }||d"  }||d#  }||d$  }||d%  }|d'||	|||	|||	|||	|| q|d d,| d-}| D ]\}}|||d!d.< q>d/d0|jf | d1 S )2a  Returns a human-readable printout of the current memory allocator
    statistics for a given device.

    This can be useful to display periodically during training, or when
    handling out-of-memory exceptions.

    Args:
        device (torch.device or int, optional): selected device. Returns
            printout for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).
        abbreviated (bool, optional): whether to return an abbreviated summary
            (default: False).

    .. note::
        See :ref:`cuda-memory-management` for more details about GPU memory
        management.
    TrD   r%   c                 S   sV   ddddddg}|d }|dd  D ]$}|d	k r4 qJ|}| d
 } |d
 }q$d | |S )NzB ZKBZMBZGBZTBZPBr   r   i   i   z{:7d} {}r1   )szZpref_szprefixesr8   
new_prefixr!   r!   r"   _format_size  s    
z$memory_summary.<locals>._format_sizec                 S   sP   dddg}|d }|dd  D ]$}|dk r. qD|}| d } |d }qd | |S )	N KMr   r   iq i  z	{:7d} {} rM   )ZcntZpref_cntrO   r8   rP   r!   r!   r"   _format_count  s    

z%memory_summary.<locals>._format_countZallocated_byteszAllocated memoryZactive_byteszActive memoryZreserved_byteszGPU reserved memoryZinactive_split_byteszNon-releasable memoryZ
allocationZAllocationsactivezActive allocssegmentzGPU reserved segmentsZinactive_splitzNon-releasable allocszK===========================================================================z= {_:16} PyTorch CUDA memory summary, device ID {device:<17d} zK---------------------------------------------------------------------------zX  {_:9} CUDA OOMs: {num_ooms:<12d} | {_:6} cudaMalloc retries: {num_alloc_retries:<8d}  zK        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  all)Z
large_poolz      from large pool)Z
small_poolz      from small pool)NNNNr3   currentpeak	allocatedfreedNz {:<21} | {} | {} | {} | {} Zoversize_allocationszOversize allocationsZoversize_segmentszOversize GPU segmentsr?   )_r&   -|z|
|z|
)r   r   r7   r1   r6   replacejoin)r&   rL   rC   rQ   rU   Zmetrics_to_displaylinesZ
metric_keyZmetric_name	formatterZ
submetricsZcurrent_prefvalZpeak_prefvalZallocated_prefvalZfreed_prefvalZsubmetric_keyZsubmetric_namer8   rY   rZ   r[   r\   Zfmt_dictr:   r;   r!   r!   r"   r     s    












	



c                 C   s   zddl }W n tk
r"   Y dS X ddl m} z|  W n |k
rR   Y dS X t| dd} || }||}g }|d|   t|dkr|d	 |D ],}|j	d
 }|d|j
dd|dd qd|S )a  Returns a human-readable printout of the running processes
    and their GPU memory use for a given device.

    This can be useful to display periodically during training, or when
    handling out-of-memory exceptions.

    Args:
        device (torch.device or int, optional): selected device. Returns
            printout for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).
    r   Nz.pynvml module not found, please install pynvml)NVMLError_DriverNotLoadedz-cuda driver can't be loaded, is cuda enabled?TrD   zGPU:zno processes are runningi   zprocess z>10dz uses z>12.3fz MB GPU memory
)pynvmlModuleNotFoundErrorrd   ZnvmlInitr   ZnvmlDeviceGetHandleByIndexZ$nvmlDeviceGetComputeRunningProcessesr7   r5   ZusedGpuMemorypidra   )r&   rf   rd   handleZprocsrb   pZmemr!   r!   r"   r     s(    



 c                 C   s*   | dkrt j } t| } t j | S )a  Returns the global free and total GPU memory occupied for a given
    device using cudaMemGetInfo.

    Args:
        device (torch.device or int, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.cuda.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        See :ref:`cuda-memory-management` for more
        details about GPU memory management.
    N)r   r'   r(   r   ZcudartZcudaMemGetInfor%   r!   r!   r"   r   @  s    
)NN)N)N)N)N)N)N)N)N)N)N)N)N)N)NF)N)N))rA   
contextlibrG   typingr   r   r   r   r?   r   r   r   Ztorch.typesr	   __all__r#   contextmanagerr$   r*   r
   r   r   r   strr   r   r   r   r   r   r   r   r   r   r   r   r   boolr   r   r   r!   r!   r!   r"   <module>   s`               
""X"|"