U
    0-e%                     @   s   d dl Z d dlZd dlZddlmZmZ ddlmZmZm	Z	 e	ddrXd dl
m  mZ dd ZG d	d
 d
ejjZedddZdS )    N   )AcceleratorStateGradientState)DistributedType
honor_typeis_tpu_availableF)Zcheck_devicec                    sh   t | ttfr&t|  fdd| D S t | trNt|  fdd|  D S t | tjrd| 	 S | S )Nc                 3   s   | ]}t | V  qd S Nmove_to_device).0tdevice U/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/accelerate/optimizer.py	<genexpr>   s     z!move_to_device.<locals>.<genexpr>c                    s   i | ]\}}|t | qS r   r	   r   kvr   r   r   
<dictcomp>    s      z"move_to_device.<locals>.<dictcomp>)

isinstancelisttupler   dicttypeitemstorchZTensorto)stater   r   r   r   r
      s    

r
   c                   @   s   e Zd ZdZd#ddZedd Zejdd Zed	d
 Zejdd
 Zedd Z	e	jdd Z	dd Z
dd Zdd Zd$ddZd%ddZdd Zedd Zedd Zdd  Zd!d" ZdS )&AcceleratedOptimizera  
    Internal wrapper around a torch optimizer.

    Conditionally will perform `step` and `zero_grad` if gradients should be synchronized when performing gradient
    accumulation.

    Args:
        optimizer (`torch.optim.optimizer.Optimizer`):
            The optimizer to wrap.
        device_placement (`bool`, *optional*, defaults to `True`):
            Whether or not the optimizer should handle device placement. If so, it will place the state dictionary of
            `optimizer` on the right device.
        scaler (`torch.cuda.amp.grad_scaler.GradScaler`, *optional*):
            The scaler to use in the step function if training with mixed precision.
    TNc                 C   s   || _ || _t | _t | _|| _d| _| jd k	rRd| _| j j	| _
t| | j j	| _|r| j  }| jjtjkrt|| jj nt|| jj}| j | d S NF)	optimizerscalerr   accelerator_stater   gradient_statedevice_placement_is_overflow_accelerate_step_calledstep_optimizer_original_step_methodpatch_optimizer_step_optimizer_patched_step_method
state_dictdistributed_typer   TPUxmsend_cpu_data_to_devicer   r
   load_state_dict)selfr!   r%   r"   r,   r   r   r   __init__7   s     


zAcceleratedOptimizer.__init__c                 C   s   | j jS r   r!   r   r2   r   r   r   r   M   s    zAcceleratedOptimizer.statec                 C   s   || j _d S r   r4   r2   r   r   r   r   r   Q   s    c                 C   s   | j jS r   r!   param_groupsr5   r   r   r   r8   U   s    z!AcceleratedOptimizer.param_groupsc                 C   s   || j _d S r   r7   )r2   r8   r   r   r   r8   Y   s    c                 C   s   | j jS r   r!   defaultsr5   r   r   r   r:   ]   s    zAcceleratedOptimizer.defaultsc                 C   s   || j _d S r   r9   )r2   r:   r   r   r   r:   a   s    c                 C   s   | j | d S r   )r!   add_param_group)r2   param_groupr   r   r   r;   e   s    z$AcceleratedOptimizer.add_param_groupc                 C   s4   | j jtjkr$| jr$t|| j j | j	| d S r   )
r#   r-   r   r.   r%   r/   r0   r   r!   r1   )r2   r,   r   r   r   r1   h   s    z$AcceleratedOptimizer.load_state_dictc                 C   s
   | j  S r   )r!   r,   r5   r   r   r   r,   m   s    zAcceleratedOptimizer.state_dictc                 C   sZ   | j jrVdt| jjjk}|r<|d kr,d}| jj|d n|d k	rLtd| j  d S )Nset_to_noneF)r=   zJ`set_to_none` for Optimizer.zero_grad` is not supported by this optimizer.)r$   sync_gradientsinspect	signaturer!   	zero_grad
parameters
ValueError)r2   r=   Z
accept_argr   r   r   rA   p   s    zAcceleratedOptimizer.zero_gradc                 C   s   | j jr| jjtjkr<|d k	r&d|ini }tj| j|d n`| j	d k	r| j
| j_| j	| j| | j	  | jsxd| _nd| _| j| j_d| _n| j| d S )Nclosure)optimizer_argsTF)r$   r>   r#   r-   r   r.   r/   Zoptimizer_stepr!   r"   r+   r(   updater'   r&   r)   )r2   rD   rE   r   r   r   r(   |   s    



zAcceleratedOptimizer.stepc                    s,   | j jD ]} fdd|d D |d< qd S )Nc                    s   g | ]}  ||qS r   )get)r   pparameters_mapr   r   
<listcomp>   s     z;AcceleratedOptimizer._switch_parameters.<locals>.<listcomp>paramsr7   )r2   rJ   r<   r   rI   r   _switch_parameters   s    z'AcceleratedOptimizer._switch_parametersc                 C   s   t dt | jS )zTWhether or not the optimizer step was done, or skipped because of gradient overflow.zThe `is_overflow` property is deprecated and will be removed in version 1.0 of Accelerate use `optimizer.step_was_skipped` instead.)warningswarnFutureWarningr&   r5   r   r   r   is_overflow   s
    z AcceleratedOptimizer.is_overflowc                 C   s   | j S )z.Whether or not the optimizer step was skipped.)r&   r5   r   r   r   step_was_skipped   s    z%AcceleratedOptimizer.step_was_skippedc                    s"   dddg  fdd| j  D S )Nr'   r)   r+   c                    s   i | ]\}}| kr||qS r   r   r   Z_ignored_keysr   r   r      s       z5AcceleratedOptimizer.__getstate__.<locals>.<dictcomp>)__dict__r   r5   r   rS   r   __getstate__   s
    z!AcceleratedOptimizer.__getstate__c                 C   s:   | j | | jd k	r6d| _| jj| _t| | jj| _d S r    )	rT   rF   r"   r'   r!   r(   r)   r*   r+   r6   r   r   r   __setstate__   s
    

z!AcceleratedOptimizer.__setstate__)TN)N)N)__name__
__module____qualname____doc__r3   propertyr   setterr8   r:   r;   r1   r,   rA   r(   rM   rQ   rR   rU   rV   r   r   r   r   r   &   s4   









	
r   )accelerated_optimizerc                    s    fdd}|S )Nc                     s   d _ | |S )NT)r'   )argskwargsr]   methodr   r   patched_step   s    z*patch_optimizer_step.<locals>.patched_stepr   )r]   ra   rb   r   r`   r   r*      s    r*   )r?   rN   r   r   r   r   utilsr   r   r   Ztorch_xla.core.xla_modelcoreZ	xla_modelr/   r
   ZoptimZ	Optimizerr   r*   r   r   r   r   <module>   s   

 