U
    0-e'                     @   s   d dl Z d dlZd dlZd dlZd dlmZ ddlmZ ddlm	Z	 G dd dZ
G dd	 d	ZG d
d deZG dd de	ZG dd dZG dd dZdS )    N)deepcopy   )AcceleratedOptimizer)AcceleratedSchedulerc                   @   sd   e Zd ZdZdd Zdd Zdd Zdd	d
ZdddZdd Z	dd Z
dd Zdd Zdd ZdS )HfDeepSpeedConfigaJ  
    This object contains a DeepSpeed configuration dictionary and can be quickly queried for things like zero stage.

    A `weakref` of this object is stored in the module's globals to be able to access the config from areas where
    things like the Trainer object is not available (e.g. `from_pretrained` and `_get_resized_embeddings`). Therefore
    it's important that this object remains alive while the program is still running.

    [`Trainer`] uses the `HfTrainerDeepSpeedConfig` subclass instead. That subclass has logic to sync the configuration
    with values of [`TrainingArguments`] by replacing special placeholder values: `"auto"`. Without this special logic
    the DeepSpeed configuration is not modified in any way.

    Args:
        config_file_or_dict (`Union[str, Dict]`): path to DeepSpeed config file or dict.

    c              
   C   s   t |trt|}n|tj|rHtj|ddd}t	|}W 5 Q R X nHzt
|d}t|}W n( tttfk
r   td| Y nX || _|   d S )Nrzutf-8)encodingzoExpected a string path to an existing deepspeed config, or a dictionary, or a base64 encoded string. Received: )
isinstancedictr   ospathexistsioopenjsonloadbase64urlsafe_b64decodedecodeloadsUnicodeDecodeErrorAttributeError
ValueErrorconfigset_stage_and_offload)selfZconfig_file_or_dictr   fZconfig_decoded r   [/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/accelerate/utils/deepspeed.py__init__*   s    


zHfDeepSpeedConfig.__init__c                 C   sb   |  dd| _d| _|  s$|  r^tddg}t|  d|  dg}t||@ dkr^d	| _d S )
Nzzero_optimization.stageFcpuZnvmez*zero_optimization.offload_optimizer.devicez&zero_optimization.offload_param.devicer   T)	get_value_stage_offloadis_zero2is_zero3setlen)r   Zoffload_devices_validZoffload_devicesr   r   r   r   ?   s    z'HfDeepSpeedConfig.set_stage_and_offloadc                 C   sH   | j }|d}| }|D ]"}||}|d krd |f  S q||fS )N.)r   splitpopget)r   ds_key_longr   nodesds_keynoder   r   r   find_config_nodeR   s    

z"HfDeepSpeedConfig.find_config_nodeNc                 C   s&   |  |\}}|dkr|S |||S )zG
        Returns the set value or `default` if no value is set
        N)r1   r,   )r   r-   defaultr   r/   r   r   r   r"   _   s    zHfDeepSpeedConfig.get_valueFc                 C   sh   | j }|d}|D ]<}|}||}|dkr|rJtd| d| j  q dS q|dk	rd|| dS )z
        Deletes a sub-section of the config file if it's found.

        Unless `must_exist` is `True` the section doesn't have to exist.
        r)   NzCan't find z entry in the config: )r   r*   r,   r   r+   )r   r-   Z
must_existr   r.   r0   Zparent_configr   r   r   del_config_sub_treeh   s    

z%HfDeepSpeedConfig.del_config_sub_treec                 C   s   |  |}|dkrdS t|S )z
        Returns `True`/``False` only if the value is set, always `False` otherwise. So use this method to ask the very
        specific question of whether the value is set to `True` (and it's not set to `False`` or isn't set).

        NFr"   boolr   r-   valuer   r   r   is_true   s    
zHfDeepSpeedConfig.is_truec                 C   s    |  |}|dkrdS t| S )z
        Returns `True`/``False` only if the value is set, always `False` otherwise. So use this method to ask the very
        specific question of whether the value is set to `False` (and it's not set to `True`` or isn't set).
        NFr4   r6   r   r   r   is_false   s    
zHfDeepSpeedConfig.is_falsec                 C   s
   | j dkS )Nr   r#   r   r   r   r   r%      s    zHfDeepSpeedConfig.is_zero2c                 C   s
   | j dkS )N   r:   r;   r   r   r   r&      s    zHfDeepSpeedConfig.is_zero3c                 C   s   | j S N)r$   r;   r   r   r   
is_offload   s    zHfDeepSpeedConfig.is_offload)N)F)__name__
__module____qualname____doc__r   r   r1   r"   r3   r8   r9   r%   r&   r>   r   r   r   r   r      s   
	
	r   c                   @   s    e Zd ZdZdd Zdd ZdS )DeepSpeedEngineWrapperz
    Internal wrapper for deepspeed.runtime.engine.DeepSpeedEngine. This is used to follow conventional training loop.

    Args:
        engine (deepspeed.runtime.engine.DeepSpeedEngine): deepspeed engine to wrap
    c                 C   s
   || _ d S r=   )engine)r   rD   r   r   r   r      s    zDeepSpeedEngineWrapper.__init__c                 K   s   | j j|f| | j   d S r=   )rD   backwardstep)r   Zlosskwargsr   r   r   rE      s    	zDeepSpeedEngineWrapper.backwardN)r?   r@   rA   rB   r   rE   r   r   r   r   rC      s   rC   c                       s>   e Zd ZdZ fddZdddZdd Zed	d
 Z  Z	S )DeepSpeedOptimizerWrapperz
    Internal wrapper around a deepspeed optimizer.

    Args:
        optimizer (`torch.optim.optimizer.Optimizer`):
            The optimizer to wrap.
    c                    s$   t  j|dd d t| jd| _d S )NF)Zdevice_placementZscaleroverflow)superr   hasattr	optimizer__has_overflow__)r   rL   	__class__r   r   r      s    z"DeepSpeedOptimizerWrapper.__init__Nc                 C   s   d S r=   r   )r   Zset_to_noner   r   r   	zero_grad   s    z#DeepSpeedOptimizerWrapper.zero_gradc                 C   s   d S r=   r   r;   r   r   r   rF      s    zDeepSpeedOptimizerWrapper.stepc                 C   s   | j r| jjS dS )zTWhether or not the optimizer step was done, or skipped because of gradient overflow.F)rM   rL   rI   r;   r   r   r   step_was_skipped   s    z*DeepSpeedOptimizerWrapper.step_was_skipped)N)
r?   r@   rA   rB   r   rP   rF   propertyrQ   __classcell__r   r   rN   r   rH      s   
rH   c                       s(   e Zd ZdZ fddZdd Z  ZS )DeepSpeedSchedulerWrapperz
    Internal wrapper around a deepspeed scheduler.

    Args:
        scheduler (`torch.optim.lr_scheduler.LambdaLR`):
            The scheduler to wrap.
        optimizers (one or a list of `torch.optim.Optimizer`):
    c                    s   t  || d S r=   )rJ   r   )r   	schedulerZ
optimizersrN   r   r   r      s    z"DeepSpeedSchedulerWrapper.__init__c                 C   s   d S r=   r   r;   r   r   r   rF      s    zDeepSpeedSchedulerWrapper.step)r?   r@   rA   rB   r   rF   rS   r   r   rN   r   rT      s   	rT   c                   @   s   e Zd ZdZdddZdS )
DummyOptima  
    Dummy optimizer presents model parameters or param groups, this is primarily used to follow conventional training
    loop when optimizer config is specified in the deepspeed config file.

    Args:
        lr (float):
            Learning rate.
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        weight_decay (float):
            Weight decay.
        **kwargs:
            Other arguments.
    MbP?r   c                 K   s   || _ || _|| _|| _d S r=   )paramslrweight_decayrG   )r   rX   rY   rZ   rG   r   r   r   r      s    zDummyOptim.__init__N)rW   r   r?   r@   rA   rB   r   r   r   r   r   rV      s   rV   c                   @   s   e Zd ZdZdddZdS )DummySchedulera  
    Dummy scheduler presents model parameters or param groups, this is primarily used to follow conventional training
    loop when scheduler config is specified in the deepspeed config file.

    Args:
        optimizer (`torch.optim.optimizer.Optimizer`):
            The optimizer to wrap.
        total_num_steps (int, *optional*):
            Total number of steps.
        warmup_num_steps (int, *optional*):
            Number of steps for warmup.
        lr_scheduler_callable (callable, *optional*):
            A callable function that creates an LR Scheduler. It accepts only one argument `optimizer`.
        **kwargs:
            Other arguments.
    Nr   c                 K   s"   || _ || _|| _|| _|| _d S r=   )rL   total_num_stepswarmup_num_stepslr_scheduler_callablerG   )r   rL   r]   r^   r_   rG   r   r   r   r     s
    zDummyScheduler.__init__)Nr   Nr[   r   r   r   r   r\      s   r\   )r   r   r   r   copyr   rL   r   rU   r   r   rC   rH   rT   rV   r\   r   r   r   r   <module>   s    