U
    *-eg                     @   sj   U d dl mZmZmZmZ d dlZd dlm  mZ	 d dlm
Z
 g Zee ed< ejjG dd dZdS )    )DictListOptionalTupleN)Tensor__all__c                   @   sd   e Zd Zdee eeeef eeeeeeed
ddZee	e d	d
dZ
ee	e  dddZdS )_FunctionalAdamMbP?g?g+?:0yE>        F)
paramslrbetasepsweight_decayamsgradmaximizeforeachfused_allow_empty_param_listc                 C   s  d|kst d| d|ks,t d| d|d   krDdk sXn t d|d  d|d   krpdk sn t d|d  d|kst d	| |||d |d |d
| _|| _|| _|| _|	| _tjt	tj
t	ttj
f f i | _t|dkr|
st dd|i| _d S )Nr   zInvalid learning rate: zInvalid epsilon value: r   g      ?z#Invalid beta parameter at index 0:    z#Invalid beta parameter at index 1: zInvalid weight_decay value: )r   r   beta1beta2r   z%optimizer got an empty parameter listr   )
ValueErrordefaultsr   r   r   r   torchjitZannotater   r   strstatelenparam_group)selfr   r   r   r   r   r   r   r   r   r    r#   h/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/torch/distributed/optim/functional_adam.py__init__   s0    $z_FunctionalAdam.__init__)paramgradc           
      C   sT  g }g }g }g }g }g }|dk	r4| | | | || jkri | j|< | j| }	td|	d< tj|tjd|	d< tj|tjd|	d< | jrtj|tjd|	d< | j| }	| |	d  | |	d  | jr| |	d  | |	d  t X tj	||||||| j| j
| jd | jd	 | jd
 | jd | jd | j| jddd W 5 Q R X dS )zo
        Similar to step, but operates on a single parameter and optionally a
        gradient tensor.
        Nr   stepZmemory_formatexp_avg
exp_avg_sqmax_exp_avg_sqr   r   r   r   r   r   r   r   r   r   r   r   r   r   Z
grad_scaleZ	found_inf)appendr   r   tensor
zeros_likepreserve_formatr   no_gradFadamr   r   r   r   )
r"   r&   r'   params_with_gradgradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepsr   r#   r#   r$   
step_paramA   sh    




 
 
 


z_FunctionalAdam.step_param)	gradientsc                 C   s  | j d }g }g }g }g }g }g }t|t|krXtddt| d dt|  t| j d |D ]\}	}
|
d k	rh||	 ||
 |	| jkri | j|	< | j|	 }td|d< tj|	tj	d|d	< tj|	tj	d|d
< | j
rtj|	tj	d|d< | j|	 }||d	  ||d
  | j
r6||d  ||d  qht X tj||||||| j
| j| jd | jd | jd | jd | jd | j| jd d d W 5 Q R X d S )Nr   zEthe gradients passed in does not equal to the size of the parameters!zParams length: z. zGradients length: r   r(   r)   r*   r+   r,   r   r   r   r   r   r-   )r!   r    r   zipr.   r   r   r/   r0   r1   r   r2   r3   r4   r   r   r   r   )r"   r<   r   r5   r6   r7   r8   r9   r:   r&   Zgradientr   r#   r#   r$   r(   {   s|    





 
 
 


z_FunctionalAdam.stepN)	r	   r
   r   r   FFFFF)__name__
__module____qualname__r   r   floatr   boolr%   r   r;   r(   r#   r#   r#   r$   r      s.            
,:r   )typingr   r   r   r   r   Ztorch.optim._functionalZoptimZ_functionalr3   r   r   r   __annotations__r   scriptr   r#   r#   r#   r$   <module>   s    