U
    d                     @   sV   d dl mZ d dlmZ d dlmZ d dlmZ d dlZe	e
ZG dd deZdS )    )core)
caffe2_pb2)get_param_device)NetModifierNc                   @   s@   e Zd ZdZdZdZdZeegZeegZdd
dZ	dddZ
d	S )GradientClippingZl1_norml2_normZby_normZby_value皙?F   Nc
           
      C   s~   || j kstd||dk	r8|| jks8td||| _|| _t|| _|| _|| _	t|| _
t|| _|| _|	| _dS )a  
        Clips gradient to avoid gradient magnitude explosion or vanishing gradient.

        Args:
        grad_clip_method: ways to clip the gradients
        clip_norm_type: type of norm used in the necessary computation
        clip_threshold: threshold used to determine whether to clip
        use_parameter_norm: a boolean to indicate whether to incorporate
            the norm of the parameter
        compute_norm_ratio: a boolean to compute the ratio between gradient norm
            and parameter norm explicitly for debugging purpose
        clip_max: when clipping by_value, any value that is greater than
            clip_max will be clipped to clip_max
        clip_min: when clipping by_value, any value that is smaller than
            clip_min will be clipped to clip_min
        blobs_to_include: names of blobs whose gradient is to be clipped. If it is set
            to none, all param 's gradient in grad_map will be clipped.
        blobs_to_exclude: names of blobs whose gradient is not to be clipped.
        z6This method of clipping, {}, has not been implemented.N)GRAD_CLIP_METHODSAssertionErrorformatCLIP_GRADIENT_NORM_TYPESgrad_clip_methodclip_norm_typefloatclip_thresholduse_parameter_normcompute_norm_ratioclip_maxclip_minblobs_to_includeblobs_to_exclude)
selfr   r   r   r   r   r   r   r   r    r   L/tmp/pip-unpacked-wheel-ua33x9lu/caffe2/python/modeling/gradient_clipping.py__init__   s(    


zGradientClipping.__init__c              
   C   s  |d k	st ttj}i }| jd kr,|}n@| jD ]8}t|}	||	s^td	|	|
 ||	 ||	< q2| jd k	r| jD ]}||d  q|| D ]~\}	}
t|
tjrqt|	|t|	 ||d}t|> | j| jkr| j| jkrd}n| j| jkrd}|j|
g|jt|
d	| d|d}|dkrH|j|gdd	}|
|g}| jr|j|	g|jt|	d	| d|d}|dkr|j|gdd	}|| | jr|||g|jt|	d
 dg |j||
g| j d n(| j| j!kr|j"|
g|
g| j#| j$d W 5 Q R X qd S )Nz#param {0} is not defined in net {1})Zparam_to_deviceZdefault_device   r	   z	_l{}_norm)prefix)pg      ?)exponentZ_norm_ratio)	threshold)maxmin)%r   r   ZDeviceOptionr   CPUr   ZBlobReferenceZBlobIsDefined	Exceptionr   Namer   popitems
isinstanceZGradientSlicer   strZDeviceScoper   BY_NORMr   L2_NORML1_NORMZLpNormZNextScopedBlobZPowr   appendr   ZDivZClipTensorByScalingr   BY_VALUEZClipr   r   )r   netZinit_netZgrad_mapZblob_to_deviceZmodify_output_recordr$   Zfinal_param_mapZblobparamZgradZdevicer   Z	grad_normZ	op_inputsZ
param_normr   r   r   
modify_netE   s    



 






zGradientClipping.modify_net)r   r   FFr	   r
   NN)NNNF)__name__
__module____qualname__r-   r,   r+   r/   r   r   r   r2   r   r   r   r   r      s$               
*  r   )Zcaffe2.pythonr   Zcaffe2.protor   Zcaffe2.python.optimizerr   Z#caffe2.python.modeling.net_modifierr   logging	getLoggerr3   loggerr   r   r   r   r   <module>   s   
