U
    ┼9%e1T  у                   @   s╝   d dl Z d dlZd dlmZmZ ddlmZ e	ej
dГshedГej
jd< edГej
jd< edГej
jd< d d	lmZmZmZ d
dД ZddД ZG ddД dej
jГZG ddД dГZdddДZdS )щ    N)┌tree_flatten┌tree_unflattenщ   )┌_dummy_typeZ_CudaStreamBase┌
_CUDAGraph┌_graph_pool_handle┌_cuda_isCurrentStreamCapturing)r   r   r   c                   C   s   t Г S )z╥
    Returns True if CUDA graph capture is underway on the current CUDA stream, False otherwise.

    If a CUDA context does not exist on the current device, returns False without initializing the context.
    )r   й r	   r	   ·P/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torch/cuda/graphs.py┌is_current_stream_capturing   s    r   c                   C   s   t Г S )z▀
    Returns an opaque token representing the id of a graph memory pool.
    See :ref:`Graph memory management<graph-memory-management>`.

    .. warning::
        This API is in beta and may change in future releases.
    )r   r	   r	   r	   r
   ┌graph_pool_handle$   s    r   c                       sv   e Zd ZdZЗ fddДZdЗ fddД	ZЗ fdd	ДZЗ fd
dДZЗ fddДZЗ fddДZ	З fddДZ
З fddДZЗ  ZS )┌	CUDAGraphzw
    Wrapper around a CUDA graph.

    .. warning::
        This API is in beta and may change in future releases.
    c                    s   t Г а| бS йN)┌super┌__new__)┌clsй┌	__class__r	   r
   r   8   s    zCUDAGraph.__new__N┌globalc                    s   t Г j||dН dS )a·  
        Begins capturing CUDA work on the current stream.

        Typically, you shouldn't call ``capture_begin`` yourself.
        Use :class:`~torch.cuda.graph` or :func:`~torch.cuda.make_graphed_callables`,
        which call ``capture_begin`` internally.

        Arguments:
            pool (optional): Token (returned by :func:`~torch.cuda.graph_pool_handle` or
                :meth:`other_Graph_instance.pool()<torch.cuda.CUDAGraph.pool>`) that hints this graph may share memory
                with the indicated pool.  See :ref:`Graph memory management<graph-memory-management>`.
            capture_error_mode (str, optional): specifies the cudaStreamCaptureMode for the graph capture stream.
                Can be "global", "thread_local" or "relaxed". During cuda graph capture, some actions, such as cudaMalloc,
                may be unsafe. "global" will error on actions in other threads, "thread_local" will only error for
                actions in the current thread, and "relaxed" will not error on these actions. Do NOT change this setting
                unless you're familiar with `cudaStreamCaptureMode <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g9d0535d93a214cbf126835257b16ba85>`_
        )┌pool┌capture_error_modeN)r   ┌capture_begin)┌selfr   r   r   r	   r
   r   ;   s    zCUDAGraph.capture_beginc                    s   t Г аб  dS )aP  
        Ends CUDA graph capture on the current stream.
        After ``capture_end``, ``replay`` may be called on this instance.

        Typically, you shouldn't call ``capture_end`` yourself.
        Use :class:`~torch.cuda.graph` or :func:`~torch.cuda.make_graphed_callables`,
        which call ``capture_end`` internally.
        N)r   ┌capture_endйr   r   r	   r
   r   O   s    	zCUDAGraph.capture_endc                    s   t Г аб  dS )z?
        Replays the CUDA work captured by this graph.
        N)r   ┌replayr   r   r	   r
   r   Z   s    zCUDAGraph.replayc                    s   t Г аб  dS )zD
        Deletes the graph currently held by this instance.
        N)r   ┌resetr   r   r	   r
   r   `   s    zCUDAGraph.resetc                    s
   t Г аб S )zэ
        Returns an opaque token representing the id of this graph's memory pool.
        This id can optionally be passed to another graph's ``capture_begin``,
        which hints the other graph may share the same memory pool.
        )r   r   r   r   r	   r
   r   f   s    zCUDAGraph.poolc                    s
   t Г аб S )zB
        Enables debugging mode for CUDAGraph.debug_dump.
        )r   ┌enable_debug_moder   r   r	   r
   r   n   s    zCUDAGraph.enable_debug_modec                    s   t Г а|бS )z╓
        Arguments:
            debug_path (required): Path to dump the graph to.

        Calls a debugging function to dump the graph if the debugging is
        enabled via CUDAGraph.enable_debug_mode()
        )r   ┌
debug_dump)r   Z
debug_pathr   r	   r
   r   t   s    zCUDAGraph.debug_dump)Nr   )┌__name__┌
__module__┌__qualname__┌__doc__r   r   r   r   r   r   r   r   ┌__classcell__r	   r	   r   r
   r   0   s   r   c                   @   s4   e Zd ZdZdZdedЬddДZddД Zd	d
Д ZdS )┌grapha╬  
    Context-manager that captures CUDA work into a :class:`torch.cuda.CUDAGraph`
    object for later replay.

    See :ref:`CUDA Graphs <cuda-graph-semantics>` for a general introduction,
    detailed use, and constraints.

    Arguments:
        cuda_graph (torch.cuda.CUDAGraph): Graph object used for capture.
        pool (optional): Opaque token (returned by a call to :func:`~torch.cuda.graph_pool_handle()` or
            :meth:`other_Graph_instance.pool()<torch.cuda.CUDAGraph.pool>`) hinting this graph's capture
            may share memory from the specified pool. See :ref:`Graph memory management<graph-memory-management>`.
        stream (torch.cuda.Stream, optional): If supplied, will be set as the current stream in the context.
            If not supplied, ``graph`` sets its own internal side stream as the current stream in the context.
        capture_error_mode (str, optional): specifies the cudaStreamCaptureMode for the graph capture stream.
            Can be "global", "thread_local" or "relaxed". During cuda graph capture, some actions, such as cudaMalloc,
            may be unsafe. "global" will error on actions in other threads, "thread_local" will only error for
            actions in the current thread, and "relaxed" will not error on actions. Do NOT change this setting
            unless you're familiar with `cudaStreamCaptureMode <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g9d0535d93a214cbf126835257b16ba85>`_

    .. note::
        For effective memory sharing, if you pass a ``pool`` used by a previous capture and the previous capture
        used an explicit ``stream`` argument, you should pass the same ``stream`` argument to this capture.

    .. warning::
        This API is in beta and may change in future releases.

    .. _cudaStreamCaptureMode:
        https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g9d0535d93a214cbf126835257b16ba85
    Nr   )r   c                 C   sr   | j jd krtjаб | j _|d kr&dn|f| _|d k	r:|n| j j| _| jd k	sRtВtjа| jб| _	|| _
|| _d S )Nr	   )r   ┌default_capture_stream┌torch┌cuda┌Streamr   Zcapture_stream┌AssertionError┌stream┌
stream_ctx┌
cuda_graphr   )r   r,   r   r*   r   r	   r	   r
   ┌__init__а   s    
 zgraph.__init__c                 C   s@   t jаб  tаб  t jаб  | jаб  | jj	| j
d| jiО d S )Nr   )r&   r'   ┌synchronize┌gcZcollectZempty_cacher+   ┌	__enter__r,   r   r   r   r   r	   r	   r
   r0   ╢   s    


  zgraph.__enter__c                 C   s   | j аб  | jа|||б d S r   )r,   r   r+   ┌__exit__)r   ┌exc_type┌	exc_value┌	tracebackr	   r	   r
   r1   ─   s    
zgraph.__exit__)NNr   )	r   r    r!   r"   r%   ┌strr-   r0   r1   r	   r	   r	   r
   r$      s      √√r$   щ   Fc           %         s.  t аб rt аб rtdГВd}t| tГs6d}| f} |f}g Й t| |ГD ]Ъ\}}t|t jjГrкt	|j
ГdkrДt	|jГdkrДt	|jГdksМtdГВtddД |аб D ГГsкtdГВt|Г\}}И аt|Гб td	dД |D ГГsDtd
ГВqDddД И D Г}	ddД | D ГЙЗ ЗfddДtt	| ГГD Г}
ddД tt	| ГГD Г}ddД tt	| ГГD Г}tГ }t jаб  t jаt jаб бПД t| ||
ГD ]p\}}}t|ГD ]V}t||О Г\}}t jjtddД |D ГГtddД |D ГГtddД |D ГГd|dН}РqЖ~~РqtW 5 Q R X t jаб  g }g }t| ||ГD ]T\}}}t jj||dНП ||О }W 5 Q R X t|Г\}}|аt|Гб |а|б Рqg }g }tt|
Гt|Гt|ГtИГГD ]╓\}}}}tddД |D ГГ}t jj||dНПF t jjtddД |D ГГtddД |D ГГtddД |D ГГd|dН}W 5 Q R X g }d}|D ]0}|jРr4|а|| б |d7 }n
|аdб Рqt|Г}|а|б |а|б РqКtt|ГГ}tt|ГГ}ddД } g }!t| ГD ]И\}"}| ||" ||" И|" |	|" ||" |
|" ||" ||" ||" Г	}#t|t jjГРr
ddД }$|$||j|#|j Г|_ |!а|б n
|!а|#б РqО|Рr&|!d S t|!ГS ) a╕  
    Accepts callables (functions or :class:`nn.Module<torch.nn.Module>`\ s)
    and returns graphed versions.

    Each graphed callable's forward pass runs its source callable's
    forward CUDA work as a CUDA graph inside a single autograd node.

    The graphed callable's forward pass also appends
    a backward node to the autograd graph. During backward, this node runs the
    callable's backward work as a CUDA graph.

    Therefore, each graphed callable should be a drop-in replacement for its source callable
    in an autograd-enabled training loop.

    See :ref:`Partial-network capture<partial-network-capture>` for detailed use and constraints.

    If you pass a tuple of several callables, their captures will use the same memory pool.
    See :ref:`Graph memory management<graph-memory-management>` for when this is appropriate.

    Arguments:
        callables (torch.nn.Module or Python function, or tuple of these): Callable or callables to graph.
            See :ref:`Graph memory management<graph-memory-management>` for when passing a tuple of callables
            is appropriate.  If you pass a tuple of callables, their order in the tuple must be the same order
            they'll run in the live workload.
        sample_args (tuple of Tensors, or tuple of tuples of Tensors): Samples args for each callable.
            If a single callable was passed, ``sample_args`` must be a single tuple of argument Tensors.
            If a tuple of callables was passed, ``sample_args`` must be tuple of tuples of argument Tensors.
        num_warmup_iters (int): The number of warmup iterations. Currently, ``DataDistributedParallel`` needs
            11 iterations for warm up. Default: ``3``.
        allow_unused_input (bool): If False, specifying inputs that were not used when computing outputs
            (and therefore their grad is always zero) is an error. Defaults to False.

    .. note::
        The ``requires_grad`` state of each Tensor in ``sample_args`` must match the state
        that's expected for the corresponding real input in the training loop.

    .. warning::
        This API is in beta and may change in future releases.

    .. warning::
        ``sample_args`` for each callable must contain only Tensors. Other types are not allowed.

    .. warning::
        Returned callables do not support higher order differentiation (e.g., double backward).

    .. warning::
        In any :class:`~torch.nn.Module` passed to :func:`~make_graphed_callables`, only parameters
        may be trainable. Buffers must have ``requires_grad=False``.

    .. warning::
        After you pass a :class:`torch.nn.Module` through :func:`~make_graphed_callables`,
        you may not add or remove any of that Module's parameters or buffers.

    .. warning::
        :class:`torch.nn.Module`\s passed to :func:`~torch.cuda.make_graphed_callables` must not have module hooks
        registered on them at the time they are passed. However, registering hooks on modules *after* passing them
        through :func:`~torch.cuda.make_graphed_callables` is allowed.

    .. warning::
        When running a graphed callable, you must pass its arguments in the same order and format
        they appeared in that callable's ``sample_args``.

    .. warning::
        The automatic mixed precision is supported in :func:`~torch.cuda.make_graphed_callables` only with disabled
        caching. The context manager `torch.cuda.amp.autocast()` must have `cache_enabled=False`.
    z_make_graphed_callables does not support the autocast caching. Please set `cache_enabled=False`.FTr   zзModules must not have hooks registered at the time they are passed. However, registering hooks on modules after passing them through make_graphed_callables is allowed.c                 s   s   | ]}|j d kV  qdS )FNй┌requires_gradй┌.0┌br	   r	   r
   ┌	<genexpr>'  s     z)make_graphed_callables.<locals>.<genexpr>zЬIn any :class:`~torch.nn.Module` passed to :func:`~make_graphed_callables`, only parameters may be trainable. All buffers must have ``requires_grad=False``.c                 s   s   | ]}t |tjГV  qd S r   )┌
isinstancer&   ZTensor)r:   ┌argr	   r	   r
   r<   .  s     zfIn the beta API, sample_args for each callable must contain only Tensors. Other types are not allowed.c                 S   s   g | ]}t |ГСqS r	   )┌len)r:   ┌argsr	   r	   r
   ┌
<listcomp>5  s     z*make_graphed_callables.<locals>.<listcomp>c                 S   s*   g | ]"}t |tjjГr"t|аб Гnd СqS )r	   )r=   r&   ┌nn┌Module┌tuple┌
parameters)r:   ┌cr	   r	   r
   rA   6  s    c                    s   g | ]}И | И|  СqS r	   r	   йr:   ┌iйZflatten_sample_argsZper_callable_module_paramsr	   r
   rA   :  s    c                 S   s   g | ]}t jаб СqS r	   йr&   r'   r   йr:   ┌_r	   r	   r
   rA   ?  s     c                 S   s   g | ]}t jаб СqS r	   rJ   rK   r	   r	   r
   rA   @  s     c                 s   s   | ]}|j r|V  qd S r   r7   йr:   ┌or	   r	   r
   r<   O  s      c                 s   s   | ]}|j r|V  qd S r   r7   rG   r	   r	   r
   r<   P  s      c                 s   s   | ]}|j rtа|бV  qd S r   йr8   r&   Z
empty_likerM   r	   r	   r
   r<   Q  s     )┌outputs┌inputsZgrad_outputsZonly_inputsZallow_unused)r   c                 s   s"   | ]}|j rtа|бnd V  qd S r   rO   rM   r	   r	   r
   r<   t  s    c                 s   s   | ]}|j r|V  qd S r   r7   rM   r	   r	   r
   r<   z  s      c                 s   s   | ]}|j r|V  qd S r   r7   rG   r	   r	   r
   r<   {  s      c                 s   s   | ]}|d k	r|V  qd S r   r	   rM   r	   r	   r
   r<   |  s      r   Nc	           
         s8   G ЗЗЗЗЗЗЗ	fddДdt jjГЙ З ЗЗfddД}	|	S )Nc                       s@   e Zd ZeЗЗЗЗfddДГZeejjjЗ ЗЗfddДГГZ	dS )zOmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphedc                    s`   t ИГD ].}И| аб || аб krИ| а|| б qИ аб  tИtГsNtВtddД ИD ГГS )Nc                 s   s   | ]}|а б V  qd S r   й┌detachrM   r	   r	   r
   r<   к  s     zjmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.forward.<locals>.<genexpr>)┌range┌data_ptr┌copy_r   r=   rD   r)   )┌ctxrQ   rH   )┌	fwd_graph┌len_user_args┌static_input_surface┌static_outputsr	   r
   ┌forwardв  s    zWmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.forwardc                    sr   t |Гt ИГkstВtИ|ГD ]*\}}|d k	r|аб |аб kr|а|б qИ аб  tИtГs`tВtddД ИD ГГS )Nc                 s   s"   | ]}|d k	r|а б n|V  qd S r   rR   r9   r	   r	   r
   r<   ║  s    zkmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.backward.<locals>.<genexpr>)r?   r)   ┌ziprU   rV   r   r=   rD   )rW   Zgrads┌g┌grad)┌	bwd_graph┌static_grad_inputs┌static_grad_outputsr	   r
   ┌backwardм  s     zXmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.backwardN)
r   r    r!   ┌staticmethodr\   r&   ┌autograd┌functionZonce_differentiablerc   r	   )r`   rX   rY   ra   rb   rZ   r[   r	   r
   ┌Graphedб  s
   	rg   c                     s(   t | Г\}}И jt|ГИ О }t|ИГS r   )┌_tree_flatten┌applyrD   ┌_tree_unflatten)┌	user_argsZflatten_user_argsrL   ┌out)rg   ┌module_params┌output_unflatten_specr	   r
   ┌functionalized╛  s    zVmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.functionalized)r&   re   ┌Function)
rX   r`   rm   rY   rn   rZ   r[   rb   ra   ro   r	   )
rg   r`   rX   rY   rm   rn   ra   rb   rZ   r[   r
   ┌make_graphed_autograd_functionЦ  s    $z>make_graphed_callables.<locals>.make_graphed_autograd_functionc                    s   З ЗЗЗfddД}|S )Nc                     s   И j ИkrИ| О S И| О S d S r   )┌training)rk   й┌func┌graph_training_state┌graphed┌orig_fwdr	   r
   ┌new_fwd┌  s    
zEmake_graphed_callables.<locals>.make_graphed_forward.<locals>.new_fwdr	   )rt   ru   rv   rw   rx   r	   rs   r
   ┌make_graphed_forward┘  s    z4make_graphed_callables.<locals>.make_graphed_forward)!r&   Zis_autocast_enabledZis_autocast_cache_enabled┌RuntimeErrorr=   rD   r]   rB   rC   r?   Z_backward_hooksZ_forward_hooksZ_forward_pre_hooksr)   ┌all┌buffersrh   ┌appendrT   r   r'   r.   r*   r(   re   r_   r$   ┌reversedr8   ┌list┌	enumeraterr   r\   )%Z	callablesZsample_argsZnum_warmup_itersZallow_unused_inputZjust_one_callablerF   r@   Zflatten_argrL   Zper_callable_len_user_argsZ"per_callable_static_input_surfacesZ
fwd_graphsZ
bwd_graphsZmempoolrt   rZ   rP   Zgrad_inputsZper_callable_static_outputsZ"per_callable_output_unflatten_specrX   Zflatten_outputs┌specZ per_callable_static_grad_outputsZper_callable_static_grad_inputsr[   r`   rm   rb   ra   Zgrad_idxr>   rq   ┌retrH   rv   ry   r	   rI   r
   ┌make_graphed_callables╩   sю    E 
 ■¤√  ■
■
    ∙
	
№ √

3ўrГ   )r6   F)r/   r&   Ztorch.utils._pytreer   rh   r   rj   ┌_utilsr   ┌hasattrZ_C┌__dict__Ztorch._Cr   r   r   r   r   r   r$   rГ   r	   r	   r	   r
   ┌<module>   s"    
OL   