U
    9%e7                     @   s4  d dl Z d dl mZ ddlmZmZmZmZmZmZ d dl	m
Z
mZ ddgZG dd deZd	d
e de de d e_de
e e
e e
e e
e e
e ee eeeeeeeedddZe
e e
e e
e e
e e
e eeeeeeeedddZe
e e
e e
e e
e e
e eeeeeeeedddZdS )    N)Tensor   )	Optimizer_default_to_fused_or_foreach_use_grad_for_differentiable_differentiable_doc_foreach_doc_maximize_doc)ListOptionalRMSproprmspropc                	       sN   e Zd Zdee eed fdd	Z fd
dZdd ZedddZ	  Z
S )r   {Gz?Gz?:0yE>r   FN)foreachmaximizedifferentiablec                    s   d|kst d| d|ks,t d| d|ksBt d| d|ksXt d| d|ksnt d| t||||||||	|
d	}t || d S )Ng        zInvalid learning rate: zInvalid epsilon value: zInvalid momentum value: zInvalid weight_decay value: zInvalid alpha value: )	lrmomentumalphaepscenteredweight_decayr   r   r   )
ValueErrordictsuper__init__)selfparamsr   r   r   r   r   r   r   r   r   defaults	__class__ R/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torch/optim/rmsprop.pyr      s,    zRMSprop.__init__c                    sX   t  | | jD ]@}|dd |dd |dd  |dd |dd qd S )Nr   r   r   Fr   r   r   )r   __setstate__param_groups
setdefault)r   stategroupr!   r#   r$   r%   0   s    
zRMSprop.__setstate__c           	      C   s$  |d D ]}|j d krq|| |j jr4td||j  | j| }t|dkrd|d< tj|tjd|d< |d dkrtj|tjd|d< |d	 rtj|tjd|d
< ||d  |d dkr||d  |d	 r||d
  |d rt	|d t
rtd|d  d7  < qd S )Nr   z)RMSprop does not support sparse gradientsr   step)Zmemory_format
square_avgr   Zmomentum_bufferr   grad_avgr   z`step` can't be a tensorr   )gradappendZ	is_sparseRuntimeErrorr(   lentorchZ
zeros_likeZpreserve_format
isinstancer   )	r   r)   params_with_gradgradssquare_avgsmomentum_buffer_list	grad_avgspr(   r#   r#   r$   _init_group9   s@    


 
 
 
zRMSprop._init_groupc           	      C   s   d}|dk	r&t   | }W 5 Q R X | jD ]t}g }g }g }g }g }| |||||| t||||||d |d |d |d |d |d |d |d	 |d
 d q,|S )zPerforms a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )r1   Zenable_gradr&   r9   r   )	r   closureZlossr)   r3   r4   r5   r7   r6   r#   r#   r$   r*   _   s8    

zRMSprop.step)	r   r   r   r   r   FNFF)N)__name__
__module____qualname__r   boolr   r%   r9   r   r*   __classcell__r#   r#   r!   r$   r   
   s$            	%	&a  Implements RMSprop algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \alpha \text{ (alpha)},\: \gamma \text{ (lr)},
                \: \theta_0 \text{ (params)}, \: f(\theta) \text{ (objective)}                   \\
            &\hspace{13mm}   \lambda \text{ (weight decay)},\: \mu \text{ (momentum)},\: centered\\
            &\textbf{initialize} : v_0 \leftarrow 0 \text{ (square average)}, \:
                \textbf{b}_0 \leftarrow 0 \text{ (buffer)}, \: g^{ave}_0 \leftarrow 0     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm}if \: \lambda \neq 0                                                    \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda  \theta_{t-1}                            \\
            &\hspace{5mm}v_t           \leftarrow   \alpha v_{t-1} + (1 - \alpha) g^2_t
                \hspace{8mm}                                                                     \\
            &\hspace{5mm} \tilde{v_t} \leftarrow v_t                                             \\
            &\hspace{5mm}if \: centered                                                          \\
            &\hspace{10mm} g^{ave}_t \leftarrow g^{ave}_{t-1} \alpha + (1-\alpha) g_t            \\
            &\hspace{10mm} \tilde{v_t} \leftarrow \tilde{v_t} -  \big(g^{ave}_{t} \big)^2        \\
            &\hspace{5mm}if \: \mu > 0                                                           \\
            &\hspace{10mm} \textbf{b}_t\leftarrow \mu \textbf{b}_{t-1} +
                g_t/ \big(\sqrt{\tilde{v_t}} +  \epsilon \big)                                   \\
            &\hspace{10mm} \theta_t \leftarrow \theta_{t-1} - \gamma \textbf{b}_t                \\
            &\hspace{5mm} else                                                                   \\
            &\hspace{10mm}\theta_t      \leftarrow   \theta_{t-1} -
                \gamma  g_t/ \big(\sqrt{\tilde{v_t}} + \epsilon \big)  \hspace{3mm}              \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to
    `lecture notes <https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf>`_ by G. Hinton.
    and centered version `Generating Sequences
    With Recurrent Neural Networks <https://arxiv.org/pdf/1308.0850v5.pdf>`_.
    The implementation here takes the square root of the gradient average before
    adding epsilon (note that TensorFlow interchanges these two operations). The effective
    learning rate is thus :math:`\gamma/(\sqrt{v} + \epsilon)` where :math:`\gamma`
    is the scheduled learning rate and :math:`v` is the weighted moving average
    of the squared gradient.
    a  
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-2)
        momentum (float, optional): momentum factor (default: 0)
        alpha (float, optional): smoothing constant (default: 0.99)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        centered (bool, optional) : if ``True``, compute the centered RMSProp,
            the gradient is normalized by an estimation of its variance
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        z	
        z

    F)r   r4   r5   r7   r6   r   r   r   r   r   r   r   r   r   c                C   sn   |dkrt | |dd\}}|r0tj r0td|rDtj sDt}nt}|| ||||||	|
|||||d dS )zsFunctional API that performs rmsprop algorithm computation.
    See :class:`~torch.optim.RMSProp` for details.
    NF)Z	use_fusedz6torch.jit.script not supported with foreach optimizers)r   r   r   r   r   r   r   r   )r   r1   ZjitZis_scriptingr/   _multi_tensor_rmsprop_single_tensor_rmsprop)r   r4   r5   r7   r6   r   r   r   r   r   r   r   r   r   _funcr#   r#   r$   r      s,    )r   r4   r5   r7   r6   r   r   r   r   r   r   r   r   c                C   sL  t | D ]<\}}|| }|s"|n| }|| }|dkrF|j||d}t|}|rrt|}t|}t|}||j||d| d |
r|| }|rt|}||d|  |j||dd	 }n|
 }|r||}n
||}|	dkr4|| }|rt|}||	|| |j|| d q|j||| d qd S )Nr   r   r   value)	enumerateaddr1   
is_complexview_as_realZmul_Zaddcmul_Zlerp_ZaddcmulZsqrt_sqrtZadd_Zaddcdiv_)r   r4   r5   r7   r6   r   r   r   r   r   r   r   r   iparamr-   r+   Zis_complex_paramr,   avgbufr#   r#   r$   rA      s:    







rA   c                C   s  t | dkrd S |rtdt| ||||g}| D ]D\\}}}}}}|rZt|}|dkr|rxtj|||d ntj|||d}dd }||}||}||}t	|| tj
|||d| d |
r||}t||d|  tj|||dd}t| t|| nt|}t|| |	dkrj||}t	||	 t||| tj||| d q8tj|||| d q8d S )	Nr   z#_foreach ops don't support autogradrD   c                 S   s   dd | D S )Nc                 S   s$   g | ]}t |rt |n|qS r#   )r1   rJ   rK   ).0tr#   r#   r$   
<listcomp>V  s    zH_multi_tensor_rmsprop.<locals>._view_complex_as_real.<locals>.<listcomp>r#   )Ztensor_listr#   r#   r$   _view_complex_as_realU  s    z4_multi_tensor_rmsprop.<locals>._view_complex_as_realr   rE   rG   )r0   AssertionErrorr   Z"_group_tensors_by_device_and_dtypevaluesr1   Z_foreach_negZ_foreach_add_Z_foreach_addZ_foreach_mul_Z_foreach_addcmul_Z_foreach_lerp_Z_foreach_addcmulZ_foreach_sqrt_Z_foreach_sqrtZ_foreach_addcdiv_)r   r4   r5   r7   r6   r   r   r   r   r   r   r   r   Zgrouped_tensorsZgrouped_paramsZgrouped_gradsZgrouped_square_avgsZgrouped_grad_avgsZgrouped_momentum_buffer_listrB   rT   rO   r#   r#   r$   r@   2  sD     



r@   )NFF)r1   r   Z	optimizerr   r   r   r   r   r	   typingr
   r   __all__r   __doc__r>   floatr   rA   r@   r#   r#   r#   r$   <module>   sz    *E   4: