U
    9%e(                     @   sT  d dl Z d dl mZ ddlmZmZmZmZmZmZm	Z	 d dl
mZ d dlmZmZ ddgZd	d
 ZG dd deZde de	 de de_dee ee ee ee ee ee ee eeeeeeedddZee ee ee ee ee ee eeeeeeedddZee ee ee ee ee ee eeeeeeedddZdS )    N)Tensor   )	Optimizer_use_grad_for_differentiable
_get_value_default_to_fused_or_foreach_differentiable_doc_foreach_doc_maximize_doc)is_compiling)ListOptionalASGDasgdc                 C   s   t | tjst| S | S N
isinstancetorchr   tensorx r   O/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torch/optim/asgd.py
_to_tensor   s    
r   c                       sN   e Zd Zdee eed fd	d
Z fddZdd ZedddZ	  Z
S )r   {Gz?-C6?      ?    .Ar   NF)foreachmaximizedifferentiablec
              
      sV   d|kst d| d|ks,t d| t||||||||	d}
t ||
 d S )N        zInvalid learning rate: zInvalid weight_decay value: )lrlambdalphat0weight_decayr   r   r    )
ValueErrordictsuper__init__)selfparamsr"   r#   r$   r%   r&   r   r   r    defaults	__class__r   r   r*      s    
zASGD.__init__c                    s  t  | | jD ](}|dd  |dd |dd qt| j }t|dkoft	|d d }|s|D ]}t
t|d |d< qpt|dkot	|d d }|s|D ]}t
|d |d< qt|dkot	|d d }|s|D ]}t
t|d |d< qd S )	Nr   r   Fr    r   stepetamu)r)   __setstate__param_groups
setdefaultliststatevalueslenr   Z	is_tensorr   float)r+   r7   groupZstate_valuesZstep_is_tensorsZeta_is_tensorZmu_is_tensorr.   r   r   r3   /   s0    



zASGD.__setstate__c           
      C   s   |d D ]}|j d k	r|| |j jr0td||j  | j| }	t|	dkrtd|	d< t|d |	d< td|	d	< tj|tj	d
|	d< ||	d	  ||	d  ||	d  ||	d  qd S )Nr,   z&ASGD does not support sparse gradientsr   r!   r0   r"   r1   g      ?r2   )Zmemory_formatax)
gradappendZ	is_sparseRuntimeErrorr7   r9   r   r   Z
zeros_likeZpreserve_format)
r+   r;   params_with_gradgradsmusaxsetasstate_stepspr7   r   r   r   _init_groupI   s&    


 
zASGD._init_groupc           
      C   s   d}|dk	r&t   | }W 5 Q R X | jD ]v}g }g }g }g }g }g }	| |||||||	 t||||||	|d |d |d |d |d |d |d |d	 d
 q,|S )zPerforms a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr#   r"   r%   r$   r&   r   r   r    )r#   r"   r%   r$   r&   r   r   r    )r   Zenable_gradr4   rH   r   )
r+   closureZlossr;   rA   rB   rC   rD   rE   rF   r   r   r   r0   `   s:    

z	ASGD.step)r   r   r   r   r   NFF)N)__name__
__module____qualname__r   boolr*   r3   rH   r   r0   __classcell__r   r   r.   r   r      s"           ah  Implements Averaged Stochastic Gradient Descent.

    It has been proposed in `Acceleration of stochastic approximation by
    averaging`_.

    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-2)
        lambd (float, optional): decay term (default: 1e-4)
        alpha (float, optional): power for eta update (default: 0.75)
        t0 (float, optional): point at which to start averaging (default: 1e6)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        z	
        zx

    .. _Acceleration of stochastic approximation by averaging:
        https://dl.acm.org/citation.cfm?id=131098

    F)r,   rB   rD   rC   rE   rF   r   r   r    r#   r"   r%   r$   r&   c	                C   sn   |dkrt | |dd\}}|r0tj r0td|rDtj sDt}nt}|| ||||||	|
|||||d dS )znFunctional API that performs asgd algorithm computation.

    See :class:`~torch.optim.ASGD` for details.
    NF)Z	use_fusedz6torch.jit.script not supported with foreach optimizers)r#   r"   r%   r$   r&   r   r    )r   r   ZjitZis_scriptingr@   _multi_tensor_asgd_single_tensor_asgd)r,   rB   rD   rC   rE   rF   r   r   r    r#   r"   r%   r$   r&   _funcr   r   r   r      s,    )r,   rB   rD   rC   rE   rF   r#   r"   r%   r$   r&   r   r    c                C   sH  dd }t | D ]0\}}|| }|s*|n| }|| }|| }|| }|| }t|rxt|}t|}t|}|d7 }t|}|
dkr|j||
d}t|}|d||   |j|| d t s|	 dkr||
|| n
|| ||d|| |  |	  }|| |dtd||  }|| qd S )Nc                 S   s   t | tjst| S | S r   r   r   r   r   r   r      s    
z'_single_tensor_asgd.<locals>._to_tensorr   r   r$   )	enumerater   
is_complexview_as_realr   addZmul_add_r   itemsubmulcopy_max)r,   rB   rD   rC   rE   rF   r#   r"   r%   r$   r&   r   r    r   iparamr>   r2   r=   r1   Zstep_tr0   Z	eta_valuenew_etanew_mur   r   r   rP      s4    





rP   c             	   C   s  t | dkrd S |rtdt| |||||g}| D ]\\}}}}}}}|r^t|}dd }||}||}||}t|d |
dkr|rtj|||
d ntj|||
d}t	|d }t
|d||   tj||| d tt |D ]X}t s||  dkr<|| || || ||  q|| ||  qtt |D ]d}t|d|| t	|| |	    }|| | tdtdt	|| |  }|| | q\q:d S )Nr   z#_foreach ops don't support autogradc                 S   s   dd | D S )Nc                 S   s$   g | ]}t |rt |n|qS r   )r   rU   rV   ).0tr   r   r   
<listcomp>/  s    zE_multi_tensor_asgd.<locals>._view_complex_as_real.<locals>.<listcomp>r   )Ztensor_listr   r   r   _view_complex_as_real.  s    z1_multi_tensor_asgd.<locals>._view_complex_as_realr   rS   )r9   AssertionErrorr   Z"_group_tensors_by_device_and_dtyper8   r   Z_foreach_negZ_foreach_add_Z_foreach_addr   Z_foreach_mul_ranger   rY   rX   rZ   r[   r\   r   r]   )r,   rB   rD   rC   rE   rF   r#   r"   r%   r$   r&   r   r    Zgrouped_tensorsZgrouped_paramsZgrouped_gradsZgrouped_axsZgrouped_musZgrouped_etasZgrouped_state_stepsrQ   re   r1   r^   r`   ra   r   r   r   rO     sD      
(rO   )NFF)r   r   Z	optimizerr   r   r   r   r   r	   r
   Ztorch._utilsr   typingr   r   __all__r   r   __doc__rM   r:   r   rP   rO   r   r   r   r   <module>   sz   $z
    5=