U
    9%e                     @   s  U d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
mZmZmZmZmZ d dlmZ d dlmZmZ d dlZd dlZdddd	d
ddddddgZdZeedf eejdf dddZee dddd
ZdJddZG dd dZdd Zeee eej f dddZddddZ dKdd Z!d!d" Z"G d#d	 d	ej#j$Z%d$d Z&ej'de&ed%d&ee( eg ee	e	f f e)e(d&d'dZ*dLd)dZ+d*d+ Z,d(a-e j.e(d,d-dZ/G d.d/ d/Z0G d0d1 d1Z1G d2d3 d3ej#j$Z2G d4d5 d5Z3d6Z4G d7d de5Z6eeg ef ee6gdf f dd8d9Z7ejee)ef d:d;d<Z8ee8d=d>d? iZ9ee)eejgef f e:d@< G dAdB dBe;Z<G dCdD dDej#j=j>Z?G dEdF dFej#j=j>Z@d(e&ed%feg ee	e	f f e)e(dGdHdIZAdS )M    N)defaultdict)	AnyCallableContextManagerDefaultDictDictIterableListOptionalTuple)ReferenceType)LoggingTensorModecapture_logs
checkpointcheckpoint_sequentialCheckpointErrorCheckpointFunctioncheck_backward_validitydetach_variableget_device_statesset_device_statesnoop_context_fnset_checkpoint_early_stopDefaultDeviceTypedefault.)inputsreturnc                 C   sf   t | trRg }| D ]6}t |tjs.|| q| }|j|_|| qt|S tdt| j	d S )Nz@Only tuple of tensors is supported. Got Unsupported input type: )

isinstancetupletorchTensorappenddetachrequires_gradRuntimeErrortype__name__)r   outinpx r*   U/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torch/utils/checkpoint.pyr   (   s    

c                 C   s    t dd | D std d S )Nc                 s   s    | ]}t |tjr|jV  qd S N)r   r   r    r#   .0r(   r*   r*   r+   	<genexpr><   s      z*check_backward_validity.<locals>.<genexpr>zBNone of the inputs have requires_grad=True. Gradients will be None)anywarningswarn)r   r*   r*   r+   r   ;   s    cudac                 C   s   t t| }|S r,   )getattrr   devicedevice_moduler*   r*   r+   _get_device_moduleB   s    
r8   c                   @   s:   e Zd ZdZdZed
edddZeedddZd	S )r   a@  
    A class that manages the default device type for checkpointing.
    If no non-CPU tensors are present, the default device type will
    be used. The default value is 'cuda'. The device type is used in
    the checkpointing process when determining which device states
    to save and restore for recomputation.
    r3   r6   c                 C   s
   | t _dS )z
        Set the default device type for checkpointing.

        Args:
            device (str): The device type to be set as default. Default is 'cuda'.
        Nr   _default_device_typer9   r*   r*   r+   set_device_typeQ   s    z!DefaultDeviceType.set_device_typer   c                   C   s   t jS )z
        Get the current default device type for checkpointing.

        Returns:
            str: The current default device type.
        r:   r*   r*   r*   r+   get_device_type[   s    z!DefaultDeviceType.get_device_typeN)r3   )	r&   
__module____qualname____doc__r;   staticmethodstrr<   r>   r*   r*   r*   r+   r   G   s   	c                  G   sT   t dd | D }t|dkr(td t|dkr<t S d|krHdS |d S d S )Nc                 S   s,   h | ]$}t |tjr|jjd ks|jjqS cpu)r   r   r    r6   r%   r.   argr*   r*   r+   	<setcomp>h   s    z%_infer_device_type.<locals>.<setcomp>   a  Tensor arguments, excluding CPU tensors, are detected on at least two types of devices. Device state will only be saved for devices of a single device type, and the remaining devices will be ignored. Consequently, if any checkpointed functions involve randomness, this may result in incorrect gradients. (Note that if CUDA devices are among the devices detected, it will be prioritized; otherwise, the first device encountered will be selected.)r   r3   )listlenr1   r2   r   r>   )argsZdevice_typesr*   r*   r+   _infer_device_typef   s    rM   r=   c               
   G   sX   t dd | D }g }tt|  }|D ](}|| ||  W 5 Q R X q&||fS )Nc                 S   s,   h | ]$}t |tjr|jjd ks| qS rD   )r   r   r    r6   r%   Z
get_devicerF   r*   r*   r+   rH      s    z$get_device_states.<locals>.<setcomp>)rJ   r8   rM   r6   r!   get_rng_state)rL   Zfwd_device_idsfwd_device_statesr7   Z	device_idr*   r*   r+   r      s    c              
   C   sD   t t| }t| |D ](\}}|| || W 5 Q R X qd S r,   )r8   rM   zipr6   set_rng_state)devicesZstatesr7   r6   stater*   r*   r+   r      s    c                 C   sp   | dkr"t  t  t  d}n.t| rLt| }| | t  d}nd }t  t  t  d}||fS )Nr3   )enableddtypeZcache_enabled)	r   is_autocast_enabledZget_autocast_gpu_dtypeZis_autocast_cache_enabled_supports_autocastr8   get_autocast_dtypeZis_autocast_cpu_enabledZget_autocast_cpu_dtype)r6   device_autocast_kwargsr7   cpu_autocast_kwargsr*   r*   r+   _get_autocast_kwargs   s"    r[   c                 C   s$   t | }| dkp"t|do"t|dS )Nr3   rV   rX   )r8   hasattrr5   r*   r*   r+   rW      s    rW   c                   @   s$   e Zd Zedd Zedd ZdS )r   c           	   	   G   s   t | || _|| _t| | _t| j\| _| _|rpt	 | _
d| _t| j}t|ddrpd| _t| \| _| _g | _g | _g }t|D ]B\}}t|r|| | j| | jd  q| j| q| j|  t  || }W 5 Q R X |S )NF_initializedT)r   run_functionpreserve_rng_staterM   r6   r[   rY   rZ   r   rN   fwd_cpu_statehad_device_in_fwdr8   r4   r   fwd_devicesrO   r   tensor_indices	enumerate	is_tensorr!   save_for_backwardno_grad)	ctxr^   r_   rL   r7   Ztensor_inputsirG   outputsr*   r*   r+   forward   s6    






zCheckpointFunction.forwardc                 G   s  t j stdt| j}| j}| j}t| j	}t
|D ]\}}|| ||< q:g }| jrf| jrf| j}t jj|| j| j	d | jrt | j | jrt| j| j tt|}	t| j	r|jjf | jnt }
t  > |
. t jjjf | j | j|	 }W 5 Q R X W 5 Q R X W 5 Q R X W 5 Q R X t |t j!r<|f}g }g }t"t#|D ]>}t $|| rP|| j%rP|&||  |&||  qPt#|dkrtdt j'|| tdd |	D }d| S )NzCheckpointing is not compatible with .grad() or when an `inputs` parameter is passed to .backward(). Please use .backward() and do not pass its `inputs` argument.rR   rT   Zdevice_typer   zInone of output has requires_grad=True, this checkpoint() is not necessaryc                 s   s$   | ]}t |tjr|jnd V  qd S r,   )r   r   r    Zgradr-   r*   r*   r+   r/   !  s   z.CheckpointFunction.backward.<locals>.<genexpr>)NN)(r   autogradZ_is_checkpoint_validr$   rJ   r   rc   saved_tensorsr8   r6   rd   r_   ra   rb   randomfork_rngrQ   r`   r   rO   r   r   rW   ampautocastrY   
contextlibnullcontextenable_gradrE   rZ   r^   r   r    rangerK   re   r#   r!   backward)rh   rL   r   rc   tensorsr7   ri   idxrng_devicesZdetached_inputsdevice_autocast_ctxrj   Zoutputs_with_gradZargs_with_gradZgradsr*   r*   r+   rw      sd    


  
2zCheckpointFunction.backwardN)r&   r?   r@   rB   rk   rw   r*   r*   r*   r+   r      s   
(c                   C   s   t  t  fS r,   )rs   rt   r*   r*   r*   r+   r   )  s    F)use_reentrant
context_fndeterminism_checkdebugc          
      O   s   |dkrt d d}|dd}|rF|rFtdddd |D  |rt|tk	sZ|d	k	rbtd
tj| |f| S t| ||||f||}t	| | ||}	zt	| W n t
k
r   |	 Y S X dS )ah  Checkpoint a model or part of the model

    Activation checkpointing is a technique that trades compute for memory.
    Instead of keeping tensors needed for backward alive until they are used in
    gradient computation during backward, forward computation in checkpointed
    regions omits saving tensors for backward and recomputes them during the
    backward pass. Activation checkpointing can be applied to any part of a
    model.

    There are currently two checkpointing implementations available, determined
    by the :attr:`use_reentrant` parameter. It is recommended that you use
    ``use_reentrant=False``. Please refer the note below for a discussion of
    their differences.

    .. warning::

        If the :attr:`function` invocation during the backward pass differs
        from the forward pass, e.g., due to a global variable, the checkpointed
        checkpointed version may not be equivalent, potentially causing an
        error being raised or leading to silently incorrect gradients.

    .. warning::

        If you are using the ``use_reentrant=True`` variant (this is currently
        the default), please refer to the note below for important
        considerations and potential limitations.

    .. note::

        The reentrant variant of checkpoint (``use_reentrant=True``) and
        the non-reentrant variant of checkpoint (``use_reentrant=False``)
        differ in the following ways:

        * Non-reentrant checkpoint stops recomputation as soon as all needed
          intermediate activations have been recomputed. This feature is enabled
          by default, but can be disabled with :func:`set_checkpoint_early_stop`.
          Reentrant checkpoint always recomputes :attr:`function` in its
          entirety during the backward pass.

        * The reentrant variant does not record the autograd graph during the
          forward pass, as it runs with the forward pass under
          :func:`torch.no_grad`. The non-reentrant version does record the
          autograd graph, allowing one to perform backward on the graph within
          checkpointed regions.

        * The reentrant checkpoint only supports the
          :func:`torch.autograd.backward` API for the backward pass without its
          `inputs` argument, while the non-reentrant version supports all ways
          of performing the backward pass.

        * At least one input and output must have ``requires_grad=True`` for the
          reentrant variant. If this condition is unmet, the checkpointed part
          of the model will not have gradients. The non-reentrant version does
          not have this requirement.

        * The reentrant version does not consider tensors in nested structures
          (e.g., custom objects, lists, dicts, etc) as participating in
          autograd, while the non-reentrant version does.

        * The reentrant checkpoint does not support checkpointed regions with
          detached tensors from the computational graph, whereas the
          non-reentrant version does. For the reentrant variant, if the
          checkpointed segment contains tensors detached using ``detach()`` or
          with :func:`torch.no_grad`, the backward pass will raise an error.
          This is because ``checkpoint`` makes all the outputs require gradients
          and this causes issues when a tensor is defined to have no gradient in
          the model. To avoid this, detach the tensors outside of the
          ``checkpoint`` function.

    Args:
        function: describes what to run in the forward pass of the model or
            part of the model. It should also know how to handle the inputs
            passed as the tuple. For example, in LSTM, if user passes
            ``(activation, hidden)``, :attr:`function` should correctly use the
            first input as ``activation`` and the second input as ``hidden``
        preserve_rng_state(bool, optional):  Omit stashing and restoring
            the RNG state during each checkpoint.
            Default: ``True``
        use_reentrant(bool, optional): Use checkpointing
            implementation that requires re-entrant autograd.
            If ``use_reentrant=False`` is specified, ``checkpoint`` will use an
            implementation that does not require re-entrant autograd. This
            allows ``checkpoint`` to support additional functionality, such as
            working as expected with ``torch.autograd.grad`` and support for
            keyword arguments input into the checkpointed function. Note that future
            versions of PyTorch will default to ``use_reentrant=False``.
            Default: ``True``
        context_fn(Callable, optional): A callable returning a tuple of two
            context managers. The function and its recomputation will be run
            under the first and second context managers respectively.
            This argument is only supported if ``use_reentrant=False``.
        determinism_check(str, optional): A string specifying the determinism
            check to perform. By default it is set to ``"default"`` which
            compares the shapes, dtypes, and devices of the recomputed tensors
            against those the saved tensors. To turn off this check, specify
            ``"none"``. Currently these are the only two supported values.
            Please open an issue if you would like to see more determinism
            checks. This argument is only supported if ``use_reentrant=False``,
            if ``use_reentrant=True``, the determinism check is always disabled.
        debug(bool, optional): If ``True``, error messages will also include
            a trace of the operators ran during the original forward computation
            as well as the recomputation. This argument is only supported if
            ``use_reentrant=False``.
        args: tuple containing inputs to the :attr:`function`

    Returns:
        Output of running :attr:`function` on :attr:`*args`
    Naa  torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.Tr_   Unexpected keyword arguments: ,c                 s   s   | ]
}|V  qd S r,   r*   rF   r*   r*   r+   r/     s     zcheckpoint.<locals>.<genexpr>FzKPassing `context_fn` or `debug` is only supported when use_reentrant=False.)r1   r2   pop
ValueErrorjoinr   r   apply'_checkpoint_without_reentrant_generatornextStopIteration)
functionr|   r}   r~   r   rL   kwargspreservegenretr*   r*   r+   r   6  sB    v    
Tc           
      K   s   | dd}|r,tdddd |D  dd }t| tjjrNt|  } t	| | }d	}t
d
||d  |D ](}	|	| d }t||	|| |||d}qr||d t	| d | |S )a%  A helper function for checkpointing sequential models.

    Sequential models execute a list of modules/functions in order
    (sequentially). Therefore, we can divide such a model in various segments
    and checkpoint each segment. All segments except the last will not store
    the intermediate  activations. The inputs of each checkpointed segment will
    be saved for re-running the segment in the backward pass.

    .. warning::
        If you are using the ``use_reentrant=True` variant (this is the
        default), please see :func:`~torch.utils.checkpoint.checkpoint` for
        the important considerations and limitations of this variant. It is
        recommended that you use ``use_reentrant=False``.

    .. warning:
        Since PyTorch 1.4, it allows only one Tensor as the input and
        intermediate outputs, just like :class:`torch.nn.Sequential`.

    Args:
        functions: A :class:`torch.nn.Sequential` or the list of modules or
            functions (comprising the model) to run sequentially.
        segments: Number of chunks to create in the model
        input: A Tensor that is input to :attr:`functions`
        preserve_rng_state(bool, optional):  Omit stashing and restoring
            the RNG state during each checkpoint.
            Default: ``True``
        use_reentrant(bool, optional): Use checkpointing
            implementation that requires re-entrant autograd.
            If ``use_reentrant=False`` is specified, ``checkpoint`` will use an
            implementation that does not require re-entrant autograd. This
            allows ``checkpoint`` to support additional functionality, such as
            working as expected with ``torch.autograd.grad`` and support for
            keyword arguments input into the checkpointed function.
            Default: ``True``

    Returns:
        Output of running :attr:`functions` sequentially on :attr:`*inputs`

    Example:
        >>> # xdoctest: +SKIP("stub")
        >>> model = nn.Sequential(...)
        >>> input_var = checkpoint_sequential(model, chunks, input_var)
    r_   Tr   r   c                 s   s   | ]
}|V  qd S r,   r*   rF   r*   r*   r+   r/     s     z(checkpoint_sequential.<locals>.<genexpr>c                    s    fdd}|S )Nc                    s$   t  d D ]}| | } q| S )NrI   )rv   )inputjend	functionsstartr*   r+   rk     s    z<checkpoint_sequential.<locals>.run_function.<locals>.forwardr*   )r   r   r   rk   r*   r   r+   r^     s    z+checkpoint_sequential.<locals>.run_functionr   rI   )r|   r_   )r   r   r   r   r   nnZ
SequentialrJ   childrenrK   rv   r   )
r   segmentsr   r|   r   r   r^   Zsegment_sizer   r   r*   r*   r+   r     s&    -
c                 C   s   | st dd S )NzqSomething went unexpectedly wrong in activation checkpoint. Please report this bug by filing an issue to PyTorch.AssertionError)Zcondr*   r*   r+   _internal_assert  s    r   )enablec                 c   s   zt }| a dV  W 5 |a X dS )a-  Context manager that sets whether checkpoint should stop recomputation
    early.

    By default, non-reentrant checkpoint stops recomputation as soon as it
    has computed all needed Tensors. This context manager can be used to disable
    that feature if it is problematic for your specific application.

    This context manager only needs to be active when forward is run. It does
    not need to be active during backward.

    Example::

    >>> # xdoctest: +SKIP(failing)
    >>> message = "saved tensors default hooks are disabled"
    >>> with set_checkpoint_early_stop(False):
    ...     # Any checkpoint under this context manager will respect this
    ...     # context manager, even if its backward is performed outside.
    ...     out = checkpoint(fn, inputs)
    ...
    >>> out.backward()
    N)_enable_checkpoint_early_stop)r   prevr*   r*   r+   r     s
    
c                   @   s   e Zd ZdS )_HandleNr&   r?   r@   r*   r*   r*   r+   r     s   r   c                   @   s   e Zd Zdd ZdS )_Holderc                 C   s   t  | _d S r,   )dicthandlesselfr*   r*   r+   __init__  s    z_Holder.__init__N)r&   r?   r@   r   r*   r*   r*   r+   r     s   r   c                   @   sD   e Zd Zedd Zeeeedf eddddZedd	 ZdS )
_NoopSaveInputsc                  G   s
   t dS )Nr   )r   empty)rL   r*   r*   r+   rk     s    z_NoopSaveInputs.forward.N)rh   r   outputr   c                    s^   t dd t|D  \}dd tD dd |D   fdd}|| _| j|  d S )Nc                 S   s$   g | ]\}}t |tjr||fqS r*   r   r   r    r.   ri   or*   r*   r+   
<listcomp>  s      z1_NoopSaveInputs.setup_context.<locals>.<listcomp>c                 S   s   i | ]\}}||qS r*   r*   )r.   abr*   r*   r+   
<dictcomp>  s      z1_NoopSaveInputs.setup_context.<locals>.<dictcomp>c                 S   s    g | ]}t |tjrd n|qS r,   r   )r.   r   r*   r*   r+   r     s     c                    s&    fddt D }|dd  S )Nc                    s(   g | ] \}}|kr  |  n|qS r*   r*   r   )idx2saved_idxrn   rc   r*   r+   r     s   zC_NoopSaveInputs.setup_context.<locals>.get_args.<locals>.<listcomp>rI   )rd   )rn   r   rL   r   rc   )rn   r+   get_args  s    z/_NoopSaveInputs.setup_context.<locals>.get_args)rP   rd   r   rf   )rh   r   r   rx   r   r*   r   r+   setup_context  s    z_NoopSaveInputs.setup_contextc                 G   s   t dd S )Nz(Did not expect to backward on this graphr   )rh   Zgrad_outputsr*   r*   r+   rw     s    z_NoopSaveInputs.backward)	r&   r?   r@   rB   rk   r   r   r   rw   r*   r*   r*   r+   r     s   
r   c                   @   s   e Zd Zdd Zdd ZdS )_CheckpointFramec                 C   sZ   || _ d | _g | _ttj| _tt| _tt	| _
|| _|| _|| _g | _d| _d| _d S )NF)recompute_fninput_saverweak_holdersr   weakrefWeakKeyDictionary
recomputedintrecomp_counterboolis_recomputed
early_stopmetadata_fnunpack_error_cbx_metadatasforward_completedignore_saved_mismatch)r   r   r   r   r   r*   r*   r+   r     s    

z_CheckpointFrame.__init__c           
   	   C   s0  | j r
d S t| j| j| ks>tdt| j d| j|  g }t| jD ]\}}| }|d krdqLt||jk t|j| d k	 t|j| | j| k | j	| }| j| |j|  }|| 
|krL|||| 
|f qLt|dkr,d}|D ]&\}}}	|d| d| d|	 d7 }qtd	| d S )
Nztorch.utils.checkpoint: A different number of tensors was saved during the original forward and recomputation.
Number of tensors saved during forward: z/
Number of tensors saved during recomputation: r    ztensor at position z:
saved metadata: z
recomputed metadata: 
zztorch.utils.checkpoint: Recomputed values for the following tensors have different metadata than during the forward pass.
)r   rK   r   r   r   rd   r   r   r   r   r   r!   )
r   gidZnb_meta_differentry   Zweak_holderholderZx_metaZrecomputed_xZmismatched_tensorsZrecomputed_metar*   r*   r+   check_recomputed_tensors_match  s6    	
z/_CheckpointFrame.check_recomputed_tensors_matchN)r&   r?   r@   r   r   r*   r*   r*   r+   r     s   r   a
   An error happened while unpacking tensors; dumping logs of latest computation
because you passed `debug=True` to `torch.utils.checkpoint.checkpoint()`.
Scroll all the way down for guidance on how to navigate these logs.

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
|        1. Stack traces of the operators that ran in the original forward     |
+------------------------------------------------------------------------------+

{forward_traces}
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
|        2. Stack traces of the operators that ran during recomputation        |
+------------------------------------------------------------------------------+

{recompute_traces}
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
|       3. Log of operators in the original forward and recomputation          |
+------------------------------------------------------------------------------+
(Scroll up to correlate stack traces with each operation listed below. This
 helps identify their source in the code.)

IMPORTANT: Differences in "detach" calls between the original forward and the
           recomputation are expected. They are introduced by the checkpointing
           mechanism and can be ignored.

Operations executed during the original forward:

{forward_ops}

Operations executed during recomputation:

{recompute_ops}

+------------------------------------------------------------------------------+
 ERROR: Detected non-determinism while running activation checkpointing

 You are seeing this error because you passed `debug=True` to checkpoint and
 tensors to be saved during the original forward and differ between those saved
 during recomputation. This can happen if different operators were ran in the
 original forward and in the recomputation.

 To identify where the mismatch may be coming from, you can do the following:

 1) Compare the operators ran during original forward and recomputation to
    see where they differ. These operators are printed above in the order they
    were executed.

 2) Review the stack trace for each operator to locate its invocation source.
    Each operator's stack trace is printed in their execution order.

 Note that the logs can be quite long. Here's how they are structured:
 (Tip: you can Ctrl-f for these headers)

 1. Stack traces of the operators that ran in the original forward
 2. Stack traces of the operators that ran during recomputation
 3. Log of operators in the original forward and recomputation
 4. Error message                                             <--- You are here
--------------------------------------------------------------------------------
c                   @   s   e Zd ZdS )r   Nr   r*   r*   r*   r+   r     s   c                     s`   t  dkot  dkG fddd} |   |  td fdd} fdd	}||fS )
Nx86_64Linuxc                       s    e Zd Zdd Z fddZdS )z._get_debug_context_and_cb.<locals>.CaptureLogsc                 S   s   d | _ d | _d S r,   )logstbsr   r*   r*   r+   r     s    z7_get_debug_context_and_cb.<locals>.CaptureLogs.__init__c                    s   t j fdd}| S )Nc               
   3   sD   t  4 tddd d} | \__| V  W 5 Q R X W 5 Q R X d S )NT)Z	python_tbZ	script_tbcpp_tb)r   r   r   r   )Zlogs_and_tb)r   r   r*   r+   logging_mode  s    zX_get_debug_context_and_cb.<locals>.CaptureLogs.get_context_manager.<locals>.logging_mode)rs   contextmanager)r   r   r   r   r+   get_context_manager  s    zB_get_debug_context_and_cb.<locals>.CaptureLogs.get_context_managerN)r&   r?   r@   r   r   r*   r   r*   r+   CaptureLogs  s   r   )ec                    s\   dd } j d k	stj d k	s$tttj|d |dd j dj d| d S )Nc           
   
   S   s   d}t |j}tt|j|jD ]\}\}}|| d|d  d| d|  d7 }d}|D ]L}|d d	k}	|st|	stqXn
|	r~d
}qX||d  d|d  d|d  d7 }qX|d7 }q |S )Nr   z   (rI   z of z in z)

FnameZ__torch_dispatch__Tfilename:liner   z

)rK   r   rd   rP   r   )
labelr   r'   Z	total_lenri   logtbZfound_torch_dispatchr   Zis_torch_dispatchr*   r*   r+   
get_str_tb  s    
$(
zF_get_debug_context_and_cb.<locals>.unpack_error_cb.<locals>.get_str_tboriginalZ	recomputer   )Zforward_tracesZrecompute_tracesZforward_opsZrecompute_ops)r   r   r   _checkpoint_error_templateformatr   )r   r   capture_logs_fwdcapture_logs_recomputer*   r+   r     s    

z2_get_debug_context_and_cb.<locals>.unpack_error_cbc                      s        fS r,   )r   r*   r   r*   r+   r}     s    z-_get_debug_context_and_cb.<locals>.context_fn)platformmachinesystemr   )r   r   r}   r*   )r   r   r   r+   _get_debug_context_and_cb  s    r   )r)   r   c                 C   s   | j | j| jdS )NshaperU   r6   r   r)   r*   r*   r+   _default_meta_extractor  s    r   nonec                 C   s   d S r,   r*   )_r*   r*   r+   <lambda>      r   "_allowed_determinism_checks_to_fnsc                   @   s   e Zd ZdS )_StopRecomputationErrorNr   r*   r*   r*   r+   r     s   r   c                       s$   e Zd Zeed fddZ  ZS )_recomputation_hook)target_frame_refr   c                    s(    fdd}dd }t  || d S )Nc                    s    }|d k	st |j  }|j   d7  < |t|jkrb|jrFt |jsZd|_|  S td|j|  }|d k	rt	|j
 d d k t |j
 < |  |j  |j
  < |jr|j  t|jkrt |  S )NrI   Tzotorch.utils.checkpoint: trying to save more tensors during recomputation than during the original forward pass.)r   r   rK   r   r   r   r   r"   r   r   r   getr   r   r   )r)   Ztarget_frameZ
recomp_idxr   r   r   r*   r+   	pack_hook  s,    

z/_recomputation_hook.__init__.<locals>.pack_hookc                 S   s   | S r,   r*   r   r*   r*   r+   unpack_hook  s    z1_recomputation_hook.__init__.<locals>.unpack_hook)superr   )r   r   r   r   r   	__class__r   r+   r     s    %z_recomputation_hook.__init__)r&   r?   r@   r   r   r   __classcell__r*   r*   r   r+   r     s   r   c                       s   e Zd Z fddZ  ZS )_checkpoint_hookc                    sR    fdd} fdd j d k	r@ fdd}t || nt | d S )Nc              	      sL   t  } jt|  jd k	rHt   j |  W 5 Q R X |S r,   )	r   r   r!   r   refr   r   rg   r   )r)   r   framer*   r+   r     s    

z,_checkpoint_hook.__init__.<locals>.pack_hookc              
      s  t j }|dkrtt } j| s jj}|	|j
}z@tt |& t j   j|  W 5 Q R X W 5 Q R X W n tk
r   Y nX d j|<  | t|| jk | j| d krtdt| j|  j| k  j| | j|  }d | j|< |S )Nr   Tztorch.utils.checkpoint: Unpack is being triggered for a tensor that was already unpacked once. If you are calling ctx.saved_tensors in backward, make sure to do so only once. Otherwise please open an issue with details on your use case.)r   Z_CZ_current_graph_task_idr   uuiduuid4r   r   grad_fnr   rn   r   r   r   rm   ru   r   r   r   r   r   r   r   )r   r   rh   rL   r   r   r*   r+   r   !  s6    

 "


z._checkpoint_hook.__init__.<locals>.unpack_hookc              
      s<   z
| W S  t k
r6 } z | W 5 d }~X Y nX d S r,   )r   r   )r   r   r   r   r*   r+   unpack_hook_with_error_cbC  s    
z<_checkpoint_hook.__init__.<locals>.unpack_hook_with_error_cb)r   r   r   )r   r   r   r  r   r  r+   r     s    
!
z_checkpoint_hook.__init__)r&   r?   r@   r   r   r*   r*   r   r+   r     s   r   )r}   r~   r   c                 /   sb  d}|r"|t krtdt \}}|tkr4t| }ntdtt  d| t| t| \}	
td\ 	rt	
 dtddrdt| \ 	
fd	d
}
t|
t||}t	jddd}tj||f| |_|jjdkrdV  dS t| |	 dV  W 5 Q R X W 5 Q R X d|_tddr^	r^s^tddS )aX  Checkpointing without reentrant autograd
    Args:
        function: describes what to run in the forward pass of the model or
            part of the model. It should also know how to handle the inputs
            passed as the tuple. For example, in LSTM, if user passes
            ``(activation, hidden)``, :attr:`function` should correctly use the
            first input as ``activation`` and the second input as ``hidden``
        preserve_rng_state(bool, optional):  Omit stashing and restoring
            the RNG state during each checkpoint.
            Default: ``True``
        context_fn(Callable, optional): A callable returning a tuple of two
            context managers. The function and its recomputation will be run
            under the first and second context managers respectively.
        determinism_check(str, optional): A string specifying the determinism
            check to perform. By default it is set to ``"default"`` which
            compares the shapes, dtypes, and devices of the recomputed tensors
            against those the saved tensors. To turn off this check, specify
            ``"none"``. Currently these are the only two supported values.
            Please open an issue if you would like to see more determinism
            checks.
        debug(bool, optional): If ``True``, error messages will also include
            a trace of the operators ran during the original forward computation
            as well as the recomputation.
        *args: Arguments to pass in to the given ``function``.
        **kwargs: Keyword arguments to pass into the given ``function``.
    Nz6debug=True is incompatible with non-default context_fnz#determinism_check should be one of z
, but got r9   Fr]   Tc                     s   | ^}}g }	rr}t jj|	d 	rHt  rHt tr^jjf nt	 }|< t j
jjf    
 || W 5 Q R X W 5 Q R X W 5 Q R X W 5 Q R X d S )Nrl   )r   ro   rp   rQ   r   rW   rq   rr   rs   rt   rE   )r   r   rL   rz   r{   rZ   r6   rY   r7   fnr`   rO   rb   ra   r_   Zrecompute_contextr*   r+   r     s.      


z=_checkpoint_without_reentrant_generator.<locals>.recompute_fnr   )r#   zPyTorch's device state was initialized in the forward pass of a Checkpoint, which is not allowed. Please open an issue if you need this feature.)r   r   r   r   rJ   keysrM   r8   r[   r   rN   r4   r   r   r   r   r   r   r   r  r   r   r$   )r  r_   r}   r~   r   rL   r   r   r   Zforward_contextr   Z	new_framedummyr*   r  r+   r   P  s\    #


 r   )r3   )r3   )T)Brs   r   r1   r   collectionsr   typingr   r   r   r   r   r   r	   r
   r   r   Z&torch.testing._internal.logging_tensorr   r   r   r   __all__Z_DEFAULT_DETERMINISM_MODEr    r   r   r8   r   rM   r   r   r   r[   rW   rm   Functionr   r   Z_disable_dynamor   rC   r   r   r   r   r   r   r   r   r   r   r   r$   r   r   r   r   __annotations__	Exceptionr   graphZsaved_tensors_hooksr   r   r   r*   r*   r*   r+   <module>   s    ,"
 
j 
L %_<(;	   /=