U
    9%eiW                     @   s  d dl Z d dlZd dlmZmZ d dlmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZ d dlmZ dddddd	d
dgZejeejgeej f dddZeje	eje	eef f eeejgeej f dddZejeeej  eej e	eef e	eje	eef f dddZedddZG dd deZejeeeeej  edddZG dd deZG dd
 d
eZ ejeeeedddZ!ejeeeeej  ed d!dZ"ejee# ed"d#d$Z$ejeeed%d&d'Z%ed(ddfejeeeeeeej   eeeej   ed)d*dZ&ej'ej(he&_)ej*he&_+e j,eee
d+ d,d-dZ-ejeejd.d/d	Z.ejeejd0d1d2Z/d9ejeeeej eej0 eeeejef d4d5d6Z1G d7d8 d8Z2dS ):    N)ABCabstractmethod)AnyCallablecastDict	GeneratorIterableOptionalSequenceSetTupleTypeUnionalways_wrap_policylambda_auto_wrap_policytransformer_auto_wrap_policysize_based_auto_wrap_policyenable_wrapwrapCustomPolicyModuleWrapPolicyroot_modulefnc                    s8   | ht jttt j d fdd  | dd dS )aQ  
    This applies ``fn`` to every module in the module tree of ``root_module``
    following a post-order traversal. If ``fn`` returns an :class:`nn.Module`,
    then this replaces the original module with the newly returned one in the
    tree. Otherwise, ``fn`` should return ``None``, in which case the module is
    not changed.
    )modulemodule_nameparent_modulec                    s   |   D ]&\}}|kr|  |||  q| }|d k	rt|tjs`td| d|  |srtd|  t|tjstd| t||| d S )Nz=Non-root modules should have their parent module set but got z for zTNon-root modules should have their module name set but got an empty module name for z.fn should return None or an nn.Module but got )named_childrenadd
isinstancennModuleAssertionErrorsetattr)r   r   r   Zchild_module_nameZchild_moduleZoptional_module_post_order_apply_innerr   Zvisited_modules Z/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torch/distributed/fsdp/wrap.pyr&   7   s&    
 z2_post_order_apply.<locals>._post_order_apply_inner N)r!   r"   strr
   r   r'   r%   r(   _post_order_apply)   s    r+   )r   target_module_to_kwargsfsdp_fnreturnc                    s$   t jtt j d fdd}|S )z
    This constructs the "wrap" function to pass to :func:`_post_order_apply`
    based on ``target_module_to_kwargs``, which should be constructed from the
    wrapping policy.
    )r   r.   c                    s(   | kr$| k	r$|  } | f|S d S Nr'   )r   kwargsr-   r   r,   r'   r(   r   ]   s    z_construct_wrap_fn.<locals>.fn)r!   r"   r
   )r   r,   r-   r   r'   r1   r(   _construct_wrap_fnR   s     r2   )r   module_classesignored_modulesroot_kwargsr,   c                 C   sP   t t|}|  D ]6}||kr$qqt||r||kr>|||< d || d< q|S )NZmixed_precision)tuplesetmodulesr    )r   r3   r4   r5   r,   Zmodule_classes_tupler   r'   r'   r(   $_run_mixed_precision_override_policyh   s    
r9   r.   c                  O   s   dS )z
    A simple recursive wrap policy that always returns ``True``. This means
    that every submodule is wrapped by the wrapper class in
    :func:`_recursive_wrap`.
    Tr'   )argsr0   r'   r'   r(   r   |   s    c                	   @   sJ   e Zd ZdZeejeej ee	e
f eejee	e
f f dddZdS )_Policyzk
    This defines an abstract base class that represents a policy for applying
    a module-level API.
    r   r4   r5   r.   c                 C   s   dS )z
        This should return a dict ``target_module_to_kwargs`` that maps from
        each target module to wrap to its kwargs.
        Nr'   )selfr   r4   r5   r'   r'   r(   _run_policy   s    z_Policy._run_policyN)__name__
__module____qualname____doc__r   r!   r"   r   r   r*   r   r?   r'   r'   r'   r(   r<      s   
r<   )r   recursenonwrapped_numelr3   r.   c                 C   s   |rdS t | t|S )a   
    This auto wrap policy wraps every module that is an instance of any type in
    ``module_classes`` as its own FSDP instance. The root module given by
    ``module`` is always wrapped as an FSDP instance regardless. Since the
    wrapping proceeds bottom up, each FSDP instance manages the parameters in
    its subtree excluding any already managed by a child FSDP instance.

    Args:
        module (nn.Module): Current module being considered.
        recurse (bool): If ``False``, then this function must decide whether
            ``module`` should be wrapped as an FSDP instance or not. If
            ``True``, then the function is still recursing down the module
            tree as a part of the DFS.
        nonwrapped_numel (int): Parameter numel not yet wrapped.
        module_classes (Set[Type[nn.Module]]): Set of module classes that are
            wrapped as FSDP instances.

    Returns:
        ``True`` if ``recurse=True``, and whether ``module`` should be wrapped
        if ``recurse=False``.
    Tr    r6   )r   rD   rE   r3   r'   r'   r(   _module_wrap_policy   s    rG   c                       st   e Zd ZdZeeej  dddZeje	ej e
eef e
eje
eef f dddZed fd	d
Z  ZS )r   z{
    This policy applies to every module of the specified module classes,
    passing in the kwargs given to the root.
    )r3   c                 C   s   t |}|| _t|| _d S r/   )r7   _module_classesr*   _module_classes_str)r>   r3   Zmodule_classes_setr'   r'   r(   __init__   s    zModuleWrapPolicy.__init__r=   c                 C   sD   t | j}i }| D ](}||kr&qqt||rt|||< q|S r/   )r6   rH   r8   r    copy)r>   r   r4   r5   r3   r,   r   r'   r'   r(   r?      s    

zModuleWrapPolicy._run_policyr:   c                    s   t   d| j d S )N())super__repr__rI   r>   	__class__r'   r(   rO      s    zModuleWrapPolicy.__repr__)r@   rA   rB   rC   r	   r   r!   r"   rJ   r   r   r*   r   r?   rO   __classcell__r'   r'   rQ   r(   r      s   
c                   @   sp   e Zd ZdZeejgeee	e
ef f f dddZejeej e	e
ef e	eje	e
ef f dddZdS )	r   a  
    This policy takes in a lambda function that maps a given ``nn.Module`` to
    either ``False``, ``True``, or a kwarg dictionary.
    - If the function returns ``False`` or an empty dictionary, then the module
      does not have the API applied.
    - If the function returns ``True``, then the module has the API applied
      with the root's kwargs.
    - If the function returns a non-empty dictionary, then the module has the
      API applied, and the dictionary overrides the root's kwargs.

    Example::

        >>> # xdoctest: +SKIP("undefined variables")
        >>> model = init_transformer_model(...)
        >>> def lambda_fn(module: nn.Module):
        >>>     if module is model.lm_head:
        >>>         return {"sharding_strategy": ShardingStrategy.SHARD_GRAD_OP}
        >>>     elif isinstance(module, TransformerBlock):
        >>>         return True
        >>>     return False
        >>> policy = CustomPolicy(lambda_fn)
        >>> fsdp_model = FSDP(model, auto_wrap_policy=policy)
    )	lambda_fnc                 C   s
   || _ d S r/   )
_lambda_fn)r>   rT   r'   r'   r(   rJ      s    zCustomPolicy.__init__r=   c                 C   sr   i }|  D ]`}||krq| |}t|ttfs@td| |sFqt|}t|trd|| |||< q|S )Nz_The lambda_fn passed to CustomPolicy should return False/True or a kwarg dict, but it returned )r8   rU   r    dictbool
ValueErrorrK   update)r>   r   r4   r5   r,   r   resr0   r'   r'   r(   r?      s     




zCustomPolicy._run_policyN)r@   rA   rB   rC   r   r!   r"   r   rW   r   r*   r   rJ   r   r?   r'   r'   r'   r(   r      s   *
)r   rD   rE   rT   r.   c                 C   s   |rdS || S )aU  
    A convenient auto wrap policy to wrap submodules based on an arbitrary user
    function. If `lambda_fn(submodule) == True``, the submodule will be wrapped as
    a `wrapper_cls` unit.

    Return if a module should be wrapped during auto wrapping.

    The first three parameters are required by :func:`_recursive_wrap`.

    Args:
        module (nn.Module): Current module being considered.
        recurse (bool): If ``False``, then this function must decide whether
            ``module`` should be wrapped as an FSDP instance or not. If
            ``True``, then the function is still recursing down the module
            tree as a part of the DFS.
        nonwrapped_numel (int): Parameter numel not yet wrapped.

        lambda_fn (Callable[[nn.Module], bool]): If this returns ``True``, then
            this module will be wrapped.
    Tr'   )r   rD   rE   rT   r'   r'   r(   r     s    )r   rD   rE   transformer_layer_clsr.   c                 C   s   t | |||S )a-  
    See :func:`_module_wrap_policy`, where ``transformer_layer_cls`` is the
    same as ``module_classes``. Note that shared parameters must be wrapped in
    the same FSDP instance, so this auto wrap policy can help wrap shared
    embeddings into the same FSDP instance for transformer models.
    )rG   )r   rD   rE   r[   r'   r'   r(   r   +  s    )r   r3   rD   c                 O   s   |rdS t | t|S d S )NTrF   )r   r3   rD   r;   r0   r'   r'   r(   _wrap_module_cls_individually:  s    r\   )r   rD   rE   r.   c                    s   t  fdd|D S )zv
    A policy that wraps ``module`` if any policy in the passed in iterable of
    ``policies`` returns ``True``.
    c                 3   s   | ]}| d V  qdS )r   rD   rE   Nr'   ).0policyr   rE   rD   r'   r(   	<genexpr>P  s   z_or_policy.<locals>.<genexpr>)any)r   rD   rE   Zpoliciesr'   r`   r(   
_or_policyF  s    
rc   g    חA)r   rD   rE   min_num_paramsforce_leaf_modulesexclude_wrap_modulesr.   c                 C   s`   |dkrt jn|}|dkr t jn|}|}||k}|rH|oFt| t| S |oZt| t| S dS )a  
    A size-based auto wrap policy.

    Args:
        module (nn.Module): Current module being considered.
        recurse (bool): If ``False``, then this function must decide whether
            ``module`` should be wrapped as an FSDP instance or not. If
            ``True``, then the function is still recursing down the module
            tree as a part of the DFS.
        nonwrapped_numel (int): Parameter numel not yet wrapped.

        min_num_params (int): Customizable policy input that controls the size
            threshold over which a module is ready to be wrapped. This is in
            units of numel.
        force_leaf_modules (Set[Type[nn.Module]]): Set of module types to keep
            as leaves, i.e. their children will never be wrapped.
        exclude_wrap_modules (Set[Type[nn.Module]]): Set of module types to be
            excluded in wrapping.

    Returns:
        Whether ``module`` should be wrapped.
    N)r   FORCE_LEAF_MODULESEXCLUDE_WRAP_MODULESr    r6   )r   rD   rE   rd   re   rf   Zmin_nonwrapped_numelZis_larger'   r'   r(   r   V  s    !)NNN)wrapper_clswrapper_kwargsr.   c              	   k   s,   d| i|}t f | dV  W 5 Q R X dS )a  
    Context manager to wrap modules using a wrapper.

    Useful for when you'd like to apply the same configuration arguments to all
    child modules that you wrap. A particularly important use case is wrapping
    large layers so that they get sharded (in-place) during initialization, to
    avoid running out of system memory. Large layers can indicate that they
    should be sharded via the ``wrap`` annotation and this context manager can
    provide the exact configuration for these nested instances.

    Usage::

        with enable_wrap(wrapper_cls, **params):
            # Wraps layer in FSDP by default if within context
            self.l1 = wrap(torch.nn.Linear(5, 5))

    Args:
        wrapper_cls:
            Class that `wrap` annotation will `wrap` modules with, such as
            `FullyShardedDataParallel`.
        **wrapper_kwargs:
            Configuration settings that will be passed to all ``wrap``
            instances inside the context
    ri   N)_ConfigAutoWrap)ri   rj   r0   r'   r'   r(   r     s
    )r   wrap_overridesr.   c                 K   s2   t jr.t jdk	stt j|}t| t jf|S | S )a  
    Annotate that a module should be wrapped. Annotated modules will only be
    wrapped if inside of an :func:`enable_wrap` context manager. This allows
    a module to be initialized both with and without a wrapper without code
    change.

    The class that this function wraps the passed in ``nn.Module`` with is the
    passed in ``wrapper_cls`` argument into ``enable_wrap``. Both
    ``enable_wrap`` and ``wrap`` can take in kwargs specifying how to construct
    the ``wrapper_cls`` instance. In the case of duplicate kwargs in
    ``enable_wrap`` and ``wrap``, the argument passed into ``wrap`` will be
    respected.

    Usage::

        with enable_wrap(wrapper_cls=FSDP, **fsdp_config):
            # Wraps layer in FSDP by default if within context
            self.l1 = wrap(torch.nn.Linear(5, 5))

    Args:
        module (nn.Module): module to wrap (if in :func:`enable_wrap` context)
        **wrap_overrides: configuration overrides that will take priority over
            the values provided by the :func:`enable_wrap` context
    N)rk   in_autowrap_contextri   r#   r0   _wrap)r   rl   r'   r'   r(   r     s    
)r   ri   r.   c                 K   s8   |d k	st t| dr,|| j}|| f|S || f|S )N_wrap_overrides)r#   hasattrro   )r   ri   r0   Z	overridesr'   r'   r(   rn     s
    

rn   F)r   auto_wrap_policyri   r4   ignored_paramsonly_wrap_childrenr0   r.   c              	      s2  |dk	st d|dk	s t d|  D ]@\}}||kr:q(zt|tt|rPt W q( tk
rf   Y q(X q(t fdd|  D }	|dk	st || d|	dr*d}
|  D ]D\}}||krqt	f |||| d	|\}}t
| || |
|7 }
q|	|
 }|s"|| d
|dr"t| |f||	fS | |
fS | dfS )a  
    Wraps submodules of ``module`` for which ``auto_wrap_policy`` returns
    ``True`` with ``wrapper_cls``.

    Args:
        module (nn.Module): Module to recursively wrap.
        auto_wrap_policy (Callable): A callable representing a policy that
            determines which modules to recursively wrap with ``wrapper_cls``.
        ignored_modules (Set[torch.nn.Module]): Modules to ignore when
            wrapping.
        ignored_params (Set[torch.nn.Parameter]): Parameters to ignore when
            wrapping; these should be the parameters contained in the modules
            in ``ignored_modules``.
    Returns:
        (nn.Module, int):
            ``module`` after wrapping and the numel recursively wrapped.
    NzMust specify auto_wrap_policy.zMust specify wrapper_clsc                 3   s   | ]}| kr|  V  qd S r/   )Znumel)r^   prr   r'   r(   ra     s     z"_recursive_wrap.<locals>.<genexpr>Tr]   r   )r   rq   ri   r4   rr   F)r#   Znamed_modulesr    r   type	TypeErrorsum
parametersr   _recursive_wrapr$   rn   )r   rq   ri   r4   rr   rs   r0   _childrE   Ztotal_wrapped_numelnameZwrapped_childZnum_wrapped_params	remainderr'   ru   r(   rz     sL    

  rz   c                   @   s   e Zd ZU dZdZeed< dZee	 ed< i Z
eeef ed< eeef ddd	Zeedd
ddZeddddZddddZeeeddddZdS )rk   z
    Helper class to wrap modules based on default config args via a context manager.
    See :func:`enable_wrap` for more information.
    Frm   Nri   r0   r0   c                 K   s
   || _ d S r/   r   )r>   r0   r'   r'   r(   rJ   ;  s    z_ConfigAutoWrap.__init__)r0   r.   c                 C   sH   t jrtddt _d|  ks(tdtt| d t _| d= | t _d S )Nz]You are already within an autowrap context and we currently do not supported nested autowrap.Tri   z9Expected to pass in wrapper_cls arg into _ConfigAutoWrap.)	rk   rm   NotImplementedErrorkeysr#   r   r   ri   r0   r   r'   r'   r(   enable_autowrap_context>  s    
z'_ConfigAutoWrap.enable_autowrap_contextr:   c                   C   s   dt _d t _i t _d S )NF)rk   rm   ri   r0   r'   r'   r'   r(   disable_autowrap_contextN  s    z(_ConfigAutoWrap.disable_autowrap_contextc                 C   s   |  | j d S r/   )r   r0   rP   r'   r'   r(   	__enter__T  s    z_ConfigAutoWrap.__enter__)exc_typeexc_valexc_tbr.   c                 C   s   |    d S r/   )r   )r>   r   r   r   r'   r'   r(   __exit__W  s    z_ConfigAutoWrap.__exit__)r@   rA   rB   rC   rm   rW   __annotations__ri   r
   r   r0   r   r*   r   rJ   staticmethodr   r   r   r   r'   r'   r'   r(   rk   1  s   
rk   )F)3
contextlibrK   abcr   r   typingr   r   r   r   r   r	   r
   r   r   r   r   r   Ztorch.nnr!   __all__r"   r+   r*   r2   r9   rW   r   r<   intrG   r   r   r   r   rv   r\   rc   r   Z
ModuleListZ
ModuleDictrh   ZMultiheadAttentionrg   contextmanagerr   r   rn   	Parameterrz   rk   r'   r'   r'   r(   <module>   s   8*
	 8     7
 #% J