U
    9%e                    @   s  U d dl Z d dlZd dlZd dlmZ d dl mZmZ d dlmZm	Z	 d dl
mZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlm  mZ d dlmZmZmZ d dl m!Z! d dlm"Z"m#Z#m$Z$m%Z% d d	l&m'Z'm(Z(m)Z)m*Z* d d
l+m,Z,m-Z- d dl.m/Z/m0Z0 ej1j2Z2g Z3ee4 e5d< ej6j7j8Z8G dd deZ9deej:e;dddZ<ee<ej:j=ddZ>ee<ej:j=dZ?ee<ej:j@dZAeeBedddZCe!e8jDe?eedddZDe!e8jEe?eedddZEe!e8jFe?eeeGeGddd ZFe!e8jHe?eeGeGeGe;ed!d"d#ZHe!e8jIjJgd$d% ZKe!e8jIjged&d'd(ZLe!e8jMe?eed)d*d+ZMe!e8jNe?eed,d-d.ZNe!e8jOeeeGeGd/d0d1ZOe!e8jPe?eed)d2d3ZPe!e8jQe?eeed4d5d6ZQe!e8jReeeGd7d8d9ZRe!e8jSe?eeeGe;d:d;d<ZSe!e8jTe?deee4d>d?d@ZTe!e8jUe?eedAdBdCZUe!e8jVe?eed)dDdEZVe!e8jWe?eeed4dFdGZWe!e8jXeeedHdIdJZXe!e8jYeeeeeef dKdLdMZYe!e8jZe8jZj[\e2j]e?deeeGeGe;eej^ edNdOdPZZe!e8j_e8j_j[\e2j]e?deeeGeGe;eej^ edNdQdRZ_e!e8j`e?eeeeGeGe;e;edSdTdUZ`e!e8jae?eeeedVdWdXZaeeBdYdZd[Zbejcd\d]d^Zde!e8jee?e9jfjgfeeeBed_d`daZee!e8jhe?eeeeBdbdcddZhe!e8jie?e9jfjgdefeeeBeGdfdgdhZie!e8jjj[e?eeeeBeGdidjdkZje!e8jjjke?eeeeBeGedldmdnZle!e8jmj[e?eeeeBeGdodpdqZme!e8jmjne?eeeeBeGedrdsdtZoeeeee eBeBeedudvdwZpe!e8jqe?eeeBedxdydzZqe!e8jreeeee eBeBeedud{d|Zre!e8jseeeee eBeBeedud}d~Zse!e8jte?de9jfjgfeeee eBedddZte!e8jue?de9jfjgfeeeee eBedddZue!e8jve* e?e9jfjgfeeeBedddZve!e8jwe?e9jfjgfeeeeBedddZwe!e8jxdeeeGdddZxe!e8jyeeedddZye!e8jzeeeB eBeBeBeBdddZze!e8j{jdeeBeeB eeB eBdddZ|e!e8j}eeeB eBeBdddZ}e!e8j~eeeB eBeBeBdddZ~eeejcdddZe!e8je>eeeBejcdddZe!e8je>eeeBejcdddZdd Ze!e8je* e?eeeB eeB eeB eeB edddZe!e8je* e?eeeB eeB eeB eeB eeB edddZe!e8jeeeGdddZe!e8jeeeB eBeBeBedddZe!e8jj[e?deeeeG edddZe!e8je8jj[\e2je8jj[\e2jeeGee; dddZe!e8jeeGee; dddZe!e8je* eeBe;dddZe!e8je* eeBe;dddZe!e8jjdeeeGedddĄZe!e8jjJdeeGeGedddƄZe!e8jdeeeBe;e;edȜddʄZe!e8jeeeBeBe;d˜dd̈́ZeeB dΜddЄZe!e8je8jgdeeeB eBee dќddӄZe!e8jje8jjgdeeBeBeedf d՜ddׄZe!e8je* e?deeeeBeBd؜ddڄZe!e8je* e?deeeeBeBe;dۜdd݄Ze!e8je* e?deeeeBeBdޜddZe!e8je?eeeeee eBeBeBeBee; eee ee ee f dddZee ee dddZe!e8jeeeeB eeee ee ee; eee ee ee f d	ddZeee ee ee ee e;eGeGe;eeeeee ee f d
ddZe!e8jeee ee ee ee e;eGeGeeeef d	ddZe8jj[\e2je8jj[\e2jeee ee ee ee e;eGeGeeeef d	ddZe8jj[\e2jdee dddZe!e8jj[eee ee eeeGeGeeeef dddZe!e8jj[eee ee eee;eGeGeeeef d	ddZe!e8jjeee ee e;eGeGeeeef dddZe!e8jj[eee ee eee;eGeGeeeeeef d	ddZe!e8je?ddd Zdd Zdd Ze!e8jdddddddeeejc eej e;e;eej dddZe!e8je8je8jgd	d
 Ze8jj[\e2je!e8jeeee ee ee e;eGeGdddZdd Ze!e8jeeee ee ee ee ee e;eGee; eeee ee f dddZe!e8jeeeee ee ee ee eGed	ddZe!e8je?eeeBeBf dddZe!e8jdde$eBe$e$e#dddZe!e8je* dde$eBe$e$e#dddZdde$eBe$e$e;e#dd d!Ze!e8je$eBe$e$d"d#d$Ze!e8je* e$eBe$e$d"d%d&Ze$eBe$e$e;d'd(d)Ze!e8je*d*d+e?eeeef d)d,d-Ze!e8jdeee;eBeGf ee;eBeGf d/d0d1Ze!e8jdd2d3Zd4d5 Zd6d7 Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jd8d9 Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jd:d; Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jd<d= Zʐd>d? Ze!e8jj[e8jj[\e2je?deeeB eeG ed@dAdBZe!e8jj[e8jj[\e2je?deeeB eeG eeG edCdDdEZe!e8jj[e8jj[\e2je?deeeB eeG eeG eeG edFdGdHZɐdIdJ Z̐dKdL Z͐dMdN ZΐdOdP ZϐddQdRZАdSdT ZѐdUdV ZҐddWdXZӐddYdZZԐd[d\ Ze!e8jj׃e8jjנ\e2je8jjנ\e2jd]d^ Ze!e8jj׃e8jjנ\e2je8jjנ\e2jd_d` Ze!e8jjۃe8jj۠\e2je8jj۠\e2jdadb Ze!e8jjۃe8jj۠\e2je8jj۠\e2jdcdd Zݐdedf ZސddgdhZߐddidjZdkdl Ze!e8jj׃e8jjנ\e2je8jjנ\e2jdmdn Ze!e8jjۃe8jj۠\e2je8jj۠\e2jdodp Zdqdr Zdsdt Ze!e8jjۃe8jj۠\e2je8jj۠\e2jdudv Ze!e8jj׃e8jjנ\e2je8jjנ\e2jdwdx Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jdydz Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jd{d| Ze!e8jj[e8jj[\e2je?deeeB e;eeG eeG ed}d~dZe!e8jj[eee;dddZe!e8je8jgdd Ze!e8jgdd Zeeee eBeBeeef dddZe!e8jeeee eBeBeeef dddZe!e8jeeee eBeBeeef dddZeeGedddZeeGedddZee%dddZe%eedddZee edddZeBe;ejcejdddZeeBeBe;dddZeeBeBeBe;dddZeeeB e;dddZeeeB e;dddZe!e8j e?eeeB e;dddZ e!e8je?deeeBeBe;edddZe!e8je* e?dd Ze!e8je* e?dd Ze!e8jdde9jfjgfddZejeje;dddZe8jj[\e2je* dd Ze!e8jj[e?deeeBeBf e;eeG eeG edddZe!e8jjŃe8jjŠ\e2je8jjŠ\e2je* e?deeeeBeBf  e;eeeGeGf  edddZ	e!e8j
e*ddddddÐdĄZ
e!e8je* ddd\dŐdƄZe!e8jj[e8jjnge* dejdddǜe#eejc ejeej e;dȜdɐdʄZe!e8jjgdejdddǜe#e#eejc ejeej e;d˜d̐d̈́Ze!e8je8jj[\e2je* ddde9jfjgfeee#e#ee eBedΜdϐdЄZe!e8je8jj[\e2je*d*dуeeeBeeef ddҐdӄZdԐdՄ Zee8je8j ee8je8j ee8je8j ee8je8j ee8je8jI ee8je8j ee8je8jP ee8je8j  ee8j!e8jM ee8j"e8j# ee8j$e8j% ee8j&e8j' ee8j(e8j) ee8j*e8j+ ee8j,e8j- ee8j.e8j/ ee8j0e8j1 ee8j2e8j3 ee8j4e8j5 ee8j6e8j7 ee8j8e8j9 ee8j:e8j; ee8j<e8j= ee8j>e8j? ee8j@e8jV dS (      N)Enum)partialreduce)chainproduct)CallablecastIterableListOptionalTupleUnion)	sym_floatsym_intTensorregister_decomposition)IntLike
NumberType
TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_outout_wrapper)expect_true	guard_int)tree_flattentree_map__all__c                   @   s   e Zd ZdZdZdZdS )	Reductionr         N)__name__
__module____qualname__NONEMEANSUM r)   r)   [/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torch/_decomp/decompositions.pyr    "   s   r    F)ftype_promotioncompute_dtype_onlyc                    s   t  fdd}|S )Nc                     sr   dd t | |fd D }tj|di\  fdd}fdd}t|| t||}rd|S t||S d S )	Nc                 S   s   g | ]}t |tr|qS r)   )
isinstancer   .0xr)   r)   r*   
<listcomp>2   s    
 z-type_casts.<locals>.inner.<locals>.<listcomp>r   type_promotion_kindc                    s   t | tr|  S | S d S Nr.   r   tor1   computation_dtyper)   r*   increase_prec:   s    

z0type_casts.<locals>.inner.<locals>.increase_precc                    s   t | tr|  S | S d S r4   r5   r7   )result_dtyper)   r*   decrease_prec@   s    

z0type_casts.<locals>.inner.<locals>.decrease_prec)r   utilselementwise_dtypesr   )argskwargsZ	flat_argsr:   r<   rr-   r+   r,   )r9   r;   r*   inner0   s    
ztype_casts.<locals>.inner)	functoolswraps)r+   r,   r-   rC   r)   rB   r*   
type_casts+   s    rF   T)r,   r-   )r,   )r1   dimreturnc                 C   s$   t ||   D ]}| d} q| S )N)rangerG   	unsqueeze)r1   rG   _r)   r)   r*   _unsqueeze_to_dim]   s    rM   out_gradyc                 C   s   | d||     S Nr!   Zconj_physicalrN   r)   r)   r*   tanh_backwardc   s    rS   c                 C   s   | |d|     S rQ   rR   rN   r)   r)   r*   sigmoid_backwardi   s    rT   )rO   r1   beta	thresholdc                 C   s.   ||   }t|| |k| | | |d  S N      ?)exptorchwhere)rO   r1   rU   rV   zr)   r)   r*   softplus_backwardo   s    r]   )grad_outputalphascaleinput_scale	is_resultself_or_resultc           	      C   sf   || }|}|}|r6t |dk| | ||  | | S t |dk| | | t ||  | | S d S Nr   )rZ   r[   rY   )	r^   r_   r`   ra   rb   rc   ZnegcoefZposcoefZ
negiptcoefr)   r)   r*   elu_backwardv   s    
re   c                 C   s   t | |S r4   )rZ   Z	full_likeselfvaluer)   r)   r*   fill_scalar   s    ri   rh   c                    s(   t   dk fdd t|  S )Nr   c                      s   d    dS )Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrG   r)   rj   r)   r*   <lambda>       zfill_tensor.<locals>.<lambda>)rZ   _checkrG   atencopyrf   r)   rj   r*   fill_tensor   s
    

rq   )rg   rH   c                 C   s    t jt j| d ddddd S N   r   min   maxrZ   clamprg   r)   r)   r*   hardsigmoid   s    r|   r^   rg   c                 C   s   t |dk|dk @ | d dS )Ng      g      @gUUUUUU?        rZ   r[   r}   r)   r)   r*   hardsigmoid_backward   s
    r   r^   rg   Zmin_valmax_valc                 C   s   t ||k||kB d| S Nr~   r   r   r)   r)   r*   hardtanh_backward   s    r   c                 C   s$   | t jt j| d dddd d S rr   ry   r{   r)   r)   r*   	hardswish   s    r   )r^   rg   rH   c              
   C   s,   t |dk dt |dk| |d d  | S )Nr~   rs         ?r   r}   r)   r)   r*   hardswish_backward   s
    r   r^   rg   rV   c                 C   s   t ||kd| S r   r   r   r)   r)   r*   threshold_backward   s    r   r^   rg   negative_slopeself_is_resultc                 C   s   t |dk| | | S rd   r   r   r)   r)   r*   leaky_relu_backward   s    r   none)gradrg   approximatec                 C   s   d}d}d}|dkr|| d }d}|| }|| }	||||	   }
t |
}d| }d| }d| }d||  }|dd| |   }|| | }| ||  S |}|| d }ddt ||   }|t || d	  }| |||   S d S )
Ng;f?g;f?gmBP?tanhr   gHm?r!   rs   g      )rZ   r   erfrY   )r   rg   r   ZM_SQRT2Z	M_SQRT1_2Z
M_2_SQRTPIZkBetaZkKappaZx_sqZx_cuberC   Z
tanh_innerleftrightZleft_derivativeZtanh_derivativeZinner_derivativeZright_derivativeZkAlphaZcdfZpdfr)   r)   r*   gelu_backward   s,    
r   )r^   inputc                 C   s:   t t|}t |}|| d||   }| ||  S rQ   )rZ   r   FZsoftplussigmoid)r^   r   Zinput_tanh_softplusZinput_sigmoidoutr)   r)   r*   mish_backward   s    
r   c                 C   s   | t |  S r4   )rZ   r   r{   r)   r)   r*   silu   s    r   c                 C   s,   ddt |   }| | d|d|    S rQ   )rZ   rY   )r^   rg   r   r)   r)   r*   silu_backward  s    r   )rg   weightrH   c                 C   s   t | dk| ||  S rd   r   )rg   r   r)   r)   r*   _prelu_kernel	  s    r   )r^   rg   r   rH   c                 C   s4   t |dk| ||  }t |dkd||  }||fS )Nr   r~   r   )r^   rg   r   Z
input_gradZweight_gradr)   r)   r*   _prelu_kernel_backward  s    r   )rg   noiseloweruppertraining	generatorrH   c           
      C   sl   |d kst |rP| dk}t| ||}t|| | | }|t||d |S || d }	t| |	S d S Nr   r!   r"   )AssertionErrorro   uniformrZ   r[   copy_
leaky_relu)
rg   r   r   r   r   r   Znot_positiverA   outputr   r)   r)   r*   rrelu_with_noise  s    r   c              	   C   s   |  t| |||||S r4   )r   r   )rg   r   r   r   r   r   r)   r)   r*   rrelu_with_noise_0  s    r   )r^   rg   r   r   r   r   r   rH   c                 C   s:   |r|| dkr|  |S || d }t| |||S d S )Ngư>r"   )mulro   r   )r^   rg   r   r   r   r   r   r   r)   r)   r*   rrelu_with_noise_backward>  s    
   r   )r^   rg   bufferrH   c                 C   sN   |dk }t |dd}t |dd}t t | }| |||d|     S )Nr   r!   rI   )rZ   r[   rY   abs)r^   rg   r   Zin_negativeZ	max_derivsignr\   r)   r)   r*   log_sigmoid_backwardR  s
    r   loss	reductionc                 C   s4   |t jjkrt| S |t jjkr,t| S | S d S r4   )r    r'   rh   rZ   meanr(   sumr   r)   r)   r*   apply_loss_reduction^  s
    

r   dtypec                 C   s4   | t jkrt jS | t jkr t jS | t jkr0t jS d S r4   )rZ   Z	complex32Zfloat16Z	complex64Zfloat32Z
complex128Zfloat64r   r)   r)   r*   to_real_dtypeg  s    


r   )rg   targetr   rH   c                 C   s   | | d }t ||S )Nr"   )r   )rg   r   r   r   r)   r)   r*   mse_lossv  s    r   )r^   r   r   r   c                 C   s,   |t jjkrd|  nd}|||  |  S )N       @)r    r'   rh   numel)r^   r   r   r   normr)   r)   r*   mse_loss_backward  s    r   rX   )rg   r   r   rU   c                 C   s<   | |   }t||k d|d  | |d|  }t||S )Nr   r"   )r   rZ   r[   r   )rg   r   r   rU   r   r)   r)   r*   smooth_l1_loss  s    &r   )r^   rg   r   r   rU   c           	      C   sZ   |t jjkrd|  nd}|| }t|}||  }t||k || | |t| S rW   )r    r'   rh   r   rZ   r   r[   r   )	r^   rg   r   r   rU   r   r1   Zabs_xZ	norm_gradr)   r)   r*   smooth_l1_loss_backward  s    

r   )r^   rg   r   r   rU   
grad_inputc                 C   s*   t | ||||}t||j t||ddS NT)Z	copy_fromZcopy_toZexact_dtype)r   r   shaper   )r^   rg   r   r   rU   r   resultr)   r)   r*   smooth_l1_loss_backward_out  s    
r   )r^   rg   r   r   deltac              
   C   s`   |t jjkrd|  nd}|| }t|| k | |  | t||k||  | || |  S rW   )r    r'   rh   r   rZ   r[   )r^   rg   r   r   r   r   r1   r)   r)   r*   huber_loss_backward  s     r   )r^   rg   r   r   r   r   c                 C   s*   t | ||||}t||j t||ddS r   )r   r   r   r   )r^   rg   r   r   r   r   r   r)   r)   r*   huber_loss_backward_out  s    
r   )r^   rg   r   r   r   ignore_indextotal_weightrH   c                 C   s   |  dk rdnd}|tjjkr(| | } ||}t||k|d}t|}	t|	||d}	|	  |     krzdkrn n
| |} |d k	rdd t	|  D }
|j
d |
|< ||
}| | } t||k| d} |	|  S )Nr"   r   r!   g      c                 S   s   g | ]}d qS )r!   r)   r0   rL   r)   r)   r*   r2     s     z&_nll_loss_backward.<locals>.<listcomp>)rG   r    r'   rh   rK   rZ   r[   
zeros_likescatterrJ   r   reshape)r^   rg   r   r   r   r   r   channel_dimsafe_targetr   Z	new_shaper)   r)   r*   _nll_loss_backward  s     	

 

r   )r^   rg   rG   rH   c           
      C   s   |  dkstdt|  |}||}|d dksNtd| d| |d }||d|}||||}t|}d| | | |  }	||  }tj||	g|dS )Nr   z*glu does not support 0-dimensional tensorsr"   z.Halving dimension must be even, but dimension z	 is size rX   rk   )	rG   r   r=   canonicalize_dimsizenarrowrZ   r   cat)
r^   rg   rG   Zwrap_dimZnInZ	inputSizeZ	firstHalfZ
secondHalfZgradInputFirstHalfZgradInputSecondHalfr)   r)   r*   glu_backward  s    


r   c                 C   sx  d|    krdks"n td|  dks6td|  dkoL|  dk}|s|jd |jd kstd|j d|j d| dkstd	|j d
|  df|d ks| |jd kstd|tjjkr8|  dkr8|   dkr| jd |jd ksdtd|jd  d|    d| jd  n,|   dkrT|  dksdtd| j t| ||||||S )Nr   r"   input tensor should be 1D or 2Dr!   ;0D or 1D target tensor expected, multi-target not supportedsize mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)rI   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )rG   r   r   r   r    r&   rh   r   )r^   rg   r   r   r   r   r   no_batch_dimr)   r)   r*   nll_loss_backward  sP    
"
&$


      r   c                 C   s   |  dkstd|   |  dks<td|   |jd |jd krx|jd |jd krx|jd |jd kstd|j d	|j | dkstd
|j d|  dt| ||||||S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: rs   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r"   r!   r   r   r   z ( z, elements))rG   r   r   r   r   )r^   rg   r   r   r   r   r   r)   r)   r*   nll_loss2d_backward1  s8    

      r   )rg   r   r   r   rH   c              	   C   s\   |d t t |  | dd |t t | | dd  }|d k	rR|| }t||S )Nr!   r)   i)rZ   maximumlog1pnew_fulllogr   )rg   r   r   r   r   r)   r)   r*   binary_cross_entropyS  s    

 
r   )r^   rg   r   r   r   rH   c                 C   sR   d}| ||  t j|d|  |d }|d k	r6|| }|tjjkrN||  }|S )Ng-q=r!   rt   )rZ   rz   r    r'   rh   r   )r^   rg   r   r   r   ZEPSILONr   r)   r)   r*   binary_cross_entropy_backwardh  s    	"r   )r   r   r   rH   c                 C   s    t t |  | }t||S r4   )rZ   r   rY   r   )r   r   r   r   r)   r)   r*   soft_margin_lossz  s    r   )r^   rg   r   r   rH   c                 C   s6   ||  t || d  }|tjjkr2||  }|S rQ   )rZ   r   r    r'   rh   r   )r^   rg   r   r   r   r)   r)   r*   soft_margin_loss_backward  s    r   r"   r   otherpc                 C   s   t j| | |dS )N)r   )ro   r   r   r)   r)   r*   dist  s    r   )x1x2rH   c           	      C   s   |  ddd}tj|tjd}| ddd}tj|tjd}t| d||gd}t|||gd}||j}|	d
 S )Nr"   rI   Tmemory_formatr   )powr   rZ   	ones_likecontiguous_formatr   r   matmulmT	clamp_minsqrt)	r   r   Zx1_normZx1_padZx2_normZx2_padZx1_Zx2_r   r)   r)   r*   _euclidean_dist  s    r   )r^   input_sizesrG   startendstepc                 C   s   |  |}t|| ||||S r4   )	new_zerosrZ   Zslice_scatter)r^   r   rG   r   r   r  r   r)   r)   r*   slice_backward  s    	
r  r!   )rg   rG   r   r   r  c                 C   sB  |   }|dkrtdt|   |}t|  }t|  }|dkrPtd|d k	r\|nd}|d k	rl|ntj}	|dk r||| 7 }|	dk r|	|| 7 }	|dk rd}n||| kr|| }|	|k r|}	n|	|| kr|| }	| 	 |||   }
|	| }|| d | ||< ||  |9  < | j
r0tdn| |||
S d S )Nr   z,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver!   z<Slice decomposition for quantized tensors aren't implemented)rG   RuntimeErrorr=   r   listr   stridesysmaxsizestorage_offsetZis_quantizedNotImplementedErrorZ
as_strided)rg   rG   r   r   r  ndimsizesstridesZ	start_valZend_valr	  lenr)   r)   r*   slice_forward  s>    	r  )r^   r   rG   indexc                 C   s   |  |}t|| ||S r4   )r  rZ   Zselect_scatter)r^   r   rG   r  r   r)   r)   r*   select_backward  s    
r  )r^   r   offsetdim1dim2c                 C   s   |  |}t|| |||S r4   )r  rZ   Zdiagonal_scatter)r^   r   r  r  r  r   r)   r)   r*   diagonal_backward  s    
r  r^   r   input_dtypec                 C   s   | j |kr||}|S r4   )r   r6   r  r)   r)   r*   _cast_grad_to_input_dtype  s    

r  )r^   r   rG   r  c                 C   s0   | | }||t j||dd  }t| || S NTrG   keepdim)rZ   r   r  
contiguous)r^   r   rG   r  Znew_grad_outputr   r)   r)   r*   _softmax_backward_data  s      
r  c                 C   s*   | t |t j| |dd  }t| ||S r  )rZ   rY   r   r  )r^   r   rG   r  r   r)   r)   r*   _log_softmax_backward_data  s      
r  c           
      C   sZ   | |d  ||d   }t tjtj|d}|d||d}|d|| |d}	||	 S )z/Utility function to implement im2col and col2imr"   r!   r   devicer   rI   )r   rZ   arangeint64rK   )
Zinput_dZkernel_dZ
dilation_dZ	padding_dZstride_dr   Zblocks_dZ	arange_kwZblocks_d_indicesZkernel_gridr)   r)   r*    _im2col_col2im_indices_along_dim  s
    r#  )r   kernel_sizedilationpaddingr  rH   c              	      s(  t tdkdd  t t dkdd  t tdkdd  t tdkdd  ddd	}|d
 | d | ddd |d | jt}t |dkotdd dd  D fdd tdd tdd   D t tdd D  fdd |dk}|s@| d} | j\}}	}
}\}}\}} \}}\}}t|
||||| j	}t|||||| j	}t
| ||||f}|dd}|d d d d ||f }|dddddd}|d}|d}|||	| | || }|s$|d}|S ) Nr"   c                   S   s   dS )Nz"im2col(): only 2D kernel supportedr)   r)   r)   r)   r*   rl   4  rm   zim2col.<locals>.<lambda>c                   S   s   dS )Nz$im2col(): only 2D dilation supportedr)   r)   r)   r)   r*   rl   5  rm   c                   S   s   dS )Nz#im2col(): only 2D padding supportedr)   r)   r)   r)   r*   rl   6  rm   c                   S   s   dS )Nz"im2col(): only 2D stride supportedr)   r)   r)   r)   r*   rl   7  rm   Tc                 S   s<   |rt dd | D nt dd | D }t|dd  d S )Nc                 s   s   | ]}|d kV  qdS r   Nr)   r0   r   r)   r)   r*   	<genexpr>:  s     z1im2col.<locals>.check_positive.<locals>.<genexpr>c                 s   s   | ]}|d kV  qdS r'  r)   r(  r)   r)   r*   r)  :  s     c                   S   s   dS )Nz<{param_name} should be greater {'than' zero, but got {param}r)   r)   r)   r)   r*   rl   <  rm   z0im2col.<locals>.check_positive.<locals>.<lambda>allrZ   rn   param
param_namestrictcondr)   r)   r*   check_positive9  s
    ( zim2col.<locals>.check_positiver$  r%  r&  Fr/  r  rs   r   c                 s   s   | ]}|d kV  qdS r'  r)   r0   dr)   r)   r*   r)  G  s     zim2col.<locals>.<genexpr>r   c                      s   dt   S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler)   r   r)   r*   rl   H  rm   c                 s   s<   | ]4\}}}}}d |d|  ||d    d  |  V  qdS )r!   r"   Nr)   r0   r   padZdilZkerstr)   r)   r*   r)  K  s   r   c                 s   s   | ]}|d kV  qdS r'  r)   )r0   cr)   r)   r*   r)  R  s     c                      s6   dt dd   d d  d d d dS )	Nz!Given an input with spacial size r   , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.r6  r)   r%  r$  output_sizer&  r   r  r)   r*   rl   S  rm   r   r   rI   r!   rs      )T)rZ   rn   r  r   r+  r7  ziprK   r#  r   r   r:  permuter   r   squeeze)r   r$  r%  r&  r  r1  r  batched_inputZ	batch_dimr   Zinput_hZinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wZblocks_row_indicesZblocks_col_indicesZpadded_inputr   Znum_blocks_rowZnum_blocks_colr)   rA  r*   im2col*  s    




 

    
          

 
 
rP  )r   rB  r$  r%  r&  r  rH   c              
      s  t tdkdd  t tdkdd  t tdkdd  t tdkdd  t tdkdd  d$d	d
}|d |d |ddd |d |d | jt}t |dkotdd dd  D fdd d d  }t d | dkfdd dd tD }	|	d |	d   t d  k fdd t  dk fdd |dk}
|
s| d} | j\}}\}}\}}\}}\}}| d d | g |	 } | dddd dd!} t	|||||| j
}t|d }t	|||||| j
}d"d tD }| d d t g| }d d ||f}tj||| dd#}t|| | | | f}|
s|d}|S )%Nr"   c                   S   s   dS )Nzonly 2D output_size supportedr)   r)   r)   r)   r*   rl     rm   zcol2im.<locals>.<lambda>c                   S   s   dS )Nzonly 2D kernel supportedr)   r)   r)   r)   r*   rl     rm   c                   S   s   dS )Nzonly 2D dilation supportedr)   r)   r)   r)   r*   rl     rm   c                   S   s   dS )Nzonly 2D padding supportedr)   r)   r)   r)   r*   rl     rm   c                   S   s   dS )Nzonly 2D stride supportedr)   r)   r)   r)   r*   rl     rm   Tc                 S   s<   |rt dd | D nt dd | D }t|dd  d S )Nc                 s   s   | ]}|d kV  qdS r'  r)   r(  r)   r)   r*   r)    s     z1col2im.<locals>.check_positive.<locals>.<genexpr>c                 s   s   | ]}|d kV  qdS r'  r)   r(  r)   r)   r*   r)    s     c                   S   s   dS )Nz9{param_name} should be greater than zero, but got {param}r)   r)   r)   r)   r*   rl     rm   z0col2im.<locals>.check_positive.<locals>.<lambda>r*  r,  r)   r)   r*   r1    s
    ( zcol2im.<locals>.check_positiver$  r%  r&  Fr2  r  rB  )r"   rs   c                 s   s   | ]}|d kV  qdS r'  r)   r4  r)   r)   r*   r)    s     zcol2im.<locals>.<genexpr>r   c                      s   dt   S )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: r6  r)   r8  r)   r*   rl     rm   r   r!   c                      s   dd  d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = r   z and kernel_size=r)   r)   )r$  r   r)   r*   rl     rm   c                 S   s:   g | ]2\}}}}}d |d|  ||d    d  |  qS r!   r"   r)   r9  r)   r)   r*   r2     s   zcol2im.<locals>.<listcomp>rI   c                      s4   d d d d d d  dd  d	S 
NzGiven output_size=r=  r>  r?  r@  z , expected input.size(-1) to be 	 but got rI   .r)   r)   Lr%  r$  rB  r&  r   r  r)   r*   rl     rm   c                      s4   d d d d d d  dd  d	S rR  r)   r)   rU  r)   r*   rl     rm   rs   r   rC  c                 S   s   g | ]\}}|d |  qS )r"   r)   )r0   or   r)   r)   r*   r2     s     
accumulate)T)rZ   rn   r  r   r+  rD  rK   r   rE  r#  r   rM   r  prodro   _unsafe_index_putr   r:  rF  )r   rB  r$  r%  r&  r  r1  r  Zprod_kernel_sizecolrG  out_hout_wrH  rI  rJ  rK  rL  rM  rN  rO  Zindices_rowZindices_colZoutput_padded_sizer   idxr)   rU  r*   col2im}  s    




 
    

"     
     
r`  )r^   maskr`   c                 C   s$   | | | |  jt| d}|S )Nr   )type_ascloner=   suggest_memory_format)r^   ra  r`   rA   r)   r)   r*   native_dropout_backward  s    re  )r   
input_size	dimensionr   r  rH   c           	      C   s   t |dkrt| dS tt ||}tj|| | jtjd}|d||	 }| 
d|d 	||d } | |}d| |f }tj||| dd S )Nr   r   r   rI   r!   r4   TrX  )r  rZ   Zsqueeze_copyr=   r   r!  r   int32ZunfoldflattenZmovedimr  ro   r[  r  )	r   rf  rg  r   r  rG   r_  r   r  r)   r)   r*   unfold_backward  s    
rk  )r^   rg   epsrH   c              	   C   sx   |d k	r>|}d| }t t ||k||k| |d|   dS t t |dk|dk| |d|   |dtdS d S )NrX   r~   r)   nan)rZ   r[   logical_andr   float)r^   rg   rl  lohir)   r)   r*   logit_backward   s    rr  r   r   trainc                 C   s*   |r|dkrt | ||d S |  S d S rd   )ro   native_dropoutrc  rs  r)   r)   r*   dropout  s    rv  c                 C   st   |r\|dkr\|dkr.t | t j| t jdfS t | |k}||  tdd|   }||fS | t j| t jdfS d S )Nr   r!   r   rX   )rZ   r   bool	rand_likero  r   )r   r   rt  Z	bool_maskresr)   r)   r*   ru    s    ru  )r1   rG   half_to_floatc                 C   s   |   } |r| jtjksttj| tjjd\}}| 	|} | 
 dkrTt| }ntj| |dd}t| | }|tj||dd }|s|	|}|S Nr3   r   T)r  )r  r   rZ   halfr   r=   r>   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr6   r   rY   amaxr   )r1   rG   rz  r9   r;   Zunnormalizedx_maxr   r)   r)   r*   _softmax+  s      


r  c           	      C   s   |   } |r| jtjksttj| tjjd\}}| 	|} | 
 dkrN| }ntj| |dd}| | }ttjt||dd}|| }|s|	|}|S r{  )r  r   rZ   r}  r   r=   r>   r~  r  r6   r   r  r   r   rY   )	r1   rG   rz  r9   r;   Zshiftedr  Zshifted_logsumexpr   r)   r)   r*   _log_softmaxB  s"     


r  )rg   r   r_   rH   c                 C   s   t j|| |dS Nr_   rZ   subrg   r   r_   r)   r)   r*   rsub_TensorZ  s    r  c                 C   s   t j|| |dS r  r  r  r)   r)   r*   rsub_Scalar_  s    r  rI   )r   indicespadding_idxscale_grad_by_freqsparserH   c                 C   sN   |   dkstd|jdkrB| d|}|jdkr>|d}|S | | S d S )Nr"   z'weight' must be 2-Dr!   r   )rG   r   r  Zindex_selectrF  )r   r  r  r  r  r   r)   r)   r*   	embeddingd  s    


r  )r^   r  num_weightsr  r  c                 C   s   t j| t jjd\}}| |} t|tj}|rp||f}t	|}t
j||g|dd}|| }	| |	d } t||k| j}
| |
d}| |f| j|jd   }t
j||g|dd|S )Nr|  TrX  rI   r   )r=   r>   r~  r  r6   r   rZ   longr  r   ro   r[  rK   rM   r  masked_fillr   )r^   r  r  r  r  r9   r;   countsonesZgrad_weights_scalera  r   grad_weightr)   r)   r*   embedding_dense_backwardx  s(     


r  r7   c                 C   s   d}| D ]}||9 }q|S rQ   r)   )r1   rA   ir)   r)   r*   rZ    s    
rZ  )rg   split_sizesrG   rH   c                 C   s   t tt|| j| kdd  t|}g }d}t|D ]R}|| }t |dkdd  t|| | j| k |	| 
||| ||7 }q8|S )Nc                   S   s   dS )NzDSplit sizes don't add up to the tensor's size in the given dimensionr)   r)   r)   r)   r*   rl     rm   z"split_with_sizes.<locals>.<lambda>r   c                   S   s   dS )NzCsplit_with_sizes expects split_sizes have only non-negative entriesr)   r)   r)   r)   r*   rl     rm   )rZ   Z_check_with
ValueErrorr   r   r  rJ   rn   r   appendr   )rg   r  rG   Z
num_splitsZsplitsZ	start_idxr  lengthr)   r)   r*   split_with_sizes  s$    
r  .)rg   
split_sizerG   rH   c                    sx   | j }|| } dkr(|dks"t| fS |  d   }t|} fddt|D }  | |  |d< t| ||S )Nr   r!   c                    s   g | ]} qS r)   r)   r0   r  r  r)   r*   r2     s     zsplit.<locals>.<listcomp>rI   )r   r   r   rJ   rZ   split)rg   r  rG   r   dim_sizechunksr  r)   r  r*   r    s    r  )rg   mat1mat2rU   r_   c                 C   sH   |   s |  s t|}t|}|t|| }|dkr<|S |||   S rd   )is_floating_point
is_complexintrZ   mm)rg   r  r  rU   r_   r   r)   r)   r*   addmm  s    r  )rg   r  r  rU   r_   use_geluc                 C   s<   t | ||||}|r2| jr(tj|ddS t|S t|S )Nr   )r   )r  is_cudaro   gelurelu)rg   r  r  rU   r_   r  r   r)   r)   r*   _addmm_activation  s    
r  )rg   r  vecrU   r_   c                 C   sH   |   s |  s t|}t|}|t|| }|dkr<|S |||   S rd   )r  r  r  rZ   mv)rg   r  r  rU   r_   r   r)   r)   r*   addmv  s    r  )r^   r   r   rstdgammaNCHxWgroupoutput_maskrH   c
              	      s  t j| ||dd t j|| dd t j|dd t|    k fdd tjfkfdd td kp  k fdd t \}
}t|dk fdd t| |	 j
d	gd
}| 	 j
d	gd
}d }d }d }|	d rvd|
  }d k	rt|d|

d	}t|d|

d	}t|dd|
}nL||

d	}||

d	}t|dtjd|
f|jd}| | | | | | }|  || |  }|d}t|d}t|d}t| |
|t||
| | }||j|j}|	d r|	|
|	|
d  |d j
dgd
 }|	d	 r|j
dgd
}|||fS )NF)Zallow_cpu_scalar_tensorsc                      s   d    dS )NzExpect input to have z	 elementsr)   r)   )r  r  r  r)   r*   rl     rm   z,native_group_norm_backward.<locals>.<lambda>c                      s   d  d dj  S )NzExpect mean to have shape (, z
, but got r8  r)   )r  r  r   r)   r*   rl     rm   c                      s    d  dd k	r  nd S )NzExpect gamma to have z elements but got rI   r   r)   )r  r  r)   r*   rl     rm   r   c                      s   d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r)   r)   )r  r  r)   r*   rl     rm   r"   rk   rX   rI   r!   r   r   )r=   Zcheck_same_deviceZcheck_same_shaperZ   rn   r   r   divmodr   viewr   rK   r   r  r   rM   r6   r   )r^   r   r   r  r  r  r  r  r  r  ZcpgZ_remZdsdbd_inputZd_gammad_biassZds_valZdb_valc1c2c3r)   )r  r  r  r  r  r   r*   native_group_norm_backward  s         

""



$
r  )r1   rH   c                 C   s   | d k	r|  |S | S r4   r6   )r1   r   r)   r)   r*   _maybe_castS  s    
r  )	grad_outr   normalized_shaper   r  r   biasr  rH   c           !         s<  |j }| }	t|j  fdd| |||fD \}
}}}|
d k	sHt|	t| }||d  }|d | }g }g }t|	D ]"}||kr|| q||| q|t	|}t	|}|dks|dkr|d r|
|nd |d r|
||d  nd |d r|
||d  nd fS || | }|d k	r4|
| }n|
}|| }t||d}t||}t||d}t||}|| | }d }d }d } |d r|| | }|d r|d k	rt|dkrt|
| |d}n|
| }|d r|d k	rt|dkrt|
|d} n|
 } t||jt||jt| |jfS )Nc                 3   s(   | ] }|d k	r|   n|V  qd S r4   )r6   r  r/   r8   r)   r*   r)  h  s   z-native_layer_norm_backward.<locals>.<genexpr>r   r!   r"   TF)r   rG   r=   get_computation_dtyper   r   r  rJ   r  rZ  r  rZ   r   r   rc  r  )!r  r   r  r   r  r   r  r  input_shapeZ
input_ndimgrad_out_cast
input_castweight_castZ	bias_castaxisZ
inner_dimsZ
outer_dimsZinner_dim_indicesZouter_dim_indicesr  r  MZx_hatZ
grad_x_hatabr  r  r  rC   r  Zd_weightr  r)   r8   r*   native_layer_norm_backwardZ  sd    







r  )
r   r   r  running_meanrunning_varr   momentumrl  
functionalrH   c	                 C   sb  dgt td|   }	t| j}
|}|}|rt| j}
| j|
d}tj||	ddd\}}t	|| }| | | }t
||	}t
||	}|d k	r|| d| |  }|s|| |d k	r|  | jd  }t
||	}|||d   }|| d| |  }|s|| n|d k	r.|d k	s2t|j|
dd}|}|j|
dd}|}|}dt||  }| jjdkr|}|}n| d	}| d	}t||  d }t||  d }| | | }|d k	r| }t||  d }|| }|d k	r | }t||  d }|| }| jjdkrJ|j| jd}|j| jd}|j| jd||||fS )
Nr   r"   r   T)rG   Z
correctionr  r!   )r   rp   cpur   )r  rJ   rG   r=   r  r   r6   rZ   Zvar_meanrsqrtrF  r   r   r   r   r   r   typer  rM   rj  )r   r   r  r  r  r   r  rl  r  Zreduction_dimsr9   new_running_meannew_running_varZ	input_accZ
biased_varr   r  r   	save_mean	save_rstdnZsqueezed_varZunbiased_varinvstdr)   r)   r*   native_batch_norm_helper  sx       






r  )	r   r   r  r  r  r   r  rl  rH   c              
   C   s,   t | |||||||d	\}}	}
}}||	|
fS NFr  r   r   r  r  r  r   r  rl  r   r  r  rL   r)   r)   r*   native_batch_norm  s            r  c              
   C   sz   |d kr$|d kr$t | |||||S |d kr4td|d krDtd|r`t | |||||||S t | ||||||S d S )Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)ro   _native_batch_norm_legitr  $_native_batch_norm_legit_no_training)r   r   r  r  r  r   r  rl  r)   r)   r*   native_batch_norm_decomposition  sJ                      r  )rH   c                    s|   |  |}|| d |   dkrh|dkrh fdd|D }  | |  ||d < tjjj| ||S tjjj|  |S )Nr!   r   c                    s   g | ]} qS r)   r)   r   r  r)   r*   r2   ?  s     z(unsafe_chunk_py_impl.<locals>.<listcomp>)r   rZ   opsro   unsafe_split_with_sizesdefaultunsafe_splitr   )tensorr  rG   r  r  r)   r  r*   unsafe_chunk_py_impl9  s    
r  )r   r   r  r  r  r  rl  rH   c              
   C   s   t j| ||||d||S r  )ro   r  r  )r   r   r  r  r  r  rl  r)   r)   r*   r  E  s    
r  c              
   C   s,   t | |||||||d	\}}	}
}}||	|
fS r  r  r  r)   r)   r*   r  [  s            r  )r   r   r  r   r  rl  rH   c           
   
   C   s,   t | ||d d |||d	\}}}}	}	|||fS r  r  )
r   r   r  r   r  rl  r   r  r  rL   r)   r)   r*   !_native_batch_norm_legit_no_statsl  s    	        r  c              
   C   sP   t | |||||||d	\}}	}
}}|d k	s2td|d k	sBtd||	|
||fS )NTz#new_running_mean should not be Nonez"new_running_var should not be None)r  r   )r   r   r  r  r  r   r  rl  r   r  r  r  r  r)   r)   r*   #_native_batch_norm_legit_functional{  s(            r  c                 C   sB   |d kst t| |k jtjd}|| |  d|  }||fS )Nr   rX   )r   rZ   rx  r6   uint8rb  )r   r   r   ra  ry  r)   r)   r*   _fused_dropout_decomposition  s    r  c                 C   s   t | tjjr| jS d S d S r4   )r.   rZ   Z_subclassesZ
FakeTensorZfake_device)r  r)   r)   r*   device_hint  s    r  c                 C   sD   |d k	r@| j jdkr@ddlm} | }d|_|j}||| |S | S )Nmetar   )FakeTensorModeT)r   r  Ztorch._subclasses.fake_tensorr  Zin_kernel_invocationZfake_tensor_converterZfrom_meta_and_device)r1   common_devicer  Z	fake_mode	converterr)   r)   r*   wrap_output_with_input_device_  s    r  )r   layoutr   
pin_memorynon_blockingr   )r1   r   r   r  r  r   c          	      C   s   |r|t jkstd|r"td|d krB|d krB|d krB|  S d}t| }|d k	r|| jkr|d k	r|jdkrt j| |} d}t j	| |} |d k	r|st j| |} d}|rt
| |} |d k	rt j| |dS | S )NTODOFr  Tr   )rZ   stridedr   rc  r  r   r  _primsZconvert_element_typeZ
device_putr  )	r1   r   r  r   r  r  r   Zdtype_convertedr  r)   r)   r*   _to_copy  s&    
r  c                 C   s
   t | S r4   )ro   aliasr7   r)   r)   r*   nop_decomposition  s    r  )r   r   r  r  r  r   exponential_average_factorepsilonc              
   C   s^   t | |||||||\}}	}
|r:||	|
| jdtjdfS ||d|d| jdtjdfS )Nr  r   )ro   r  r  rZ   r  )r   r   r  r  r  r   r  r  r  r  r<  r)   r)   r*   cudnn_batch_norm  s"    
r  c                 C   sD   t |D ]6\}}|dkr|| jk r4| j| || ks| |} q| S rQ   )	enumerater  r   rK   )r1   broadcast_maskr  ra  r)   r)   r*   _broadcast_batch_norm_backward  s    $r	  )r  r   r   r  r  r  save_invstdrt  rl  r  rH   c
           &         s  |j }
|d k	r|j }n|
}t|j   fdd| ||||||fD \}}}}}}}|j}| }|dksrtdd}tt|||  }|}|}|r|d k	r|d k	stn&|d k	r|d k	st|}t	|| }dg| }|| ||< g }t
|D ]}||kr|| qt||}d| }t||}t|||  |}t|| |}tt|| || |} |d krt||d }!nt|| |}!|r|| |  }"||" | |! }#n||! }#|	d r|| }$nd }$|	d r|}%nd }%|#|
t|$|t|%|fS )Nc                 3   s$   | ]}|d k	r|  n|V  qd S r4   r  r/   r8   r)   r*   r)  !  s   z-native_batch_norm_backward.<locals>.<genexpr>r"   z$rank of the input must be at least 2r!   rX   )r   r=   r  r   rG   r   rZ  r  rZ   r  rJ   r  r	  r   r   r6   r  )&r  r   r   r  r  r  r
  rt  rl  r  r  Zweight_dtyper  r  r  Zrunning_mean_castZrunning_var_castZsave_mean_castZsave_invstd_castr  Z
input_rankr  Znum_featuresr   r  r  Zreduction_axesr  r   Zgrad_output_sumZdot_pZ	grad_meanZ
proj_scaleZ
grad_scaleZprojr   r  Z	grad_biasr)   r8   r*   native_batch_norm_backward  s    	



 


r  	r   r^   r   r  r  r  Zsave_varr  ZreserveSpacec	           	      C   s"   t || |||||d|dddg
S )NT)ro   r  r  r)   r)   r*   cudnn_batch_norm_backwardj  s    r  )r   rB  c                    s.  | j  | jttdkfdd | jdd  D ]}t|dkfdd q:| jtjtjtjtj	tj
fkrtjj| |S d |d  dkrd |d  dkrtdd	 tdd  |D }td
d	 tdd  ||D }tjj| ||S dd dd  fdd}|d |d \}}}}	|d |d \}
}}}| dt|d|
f }|	s|stj|ddS dd }|||||	dd\}}|||||dd\}}d }tt|jd t|jd D ]B\}}|d kr|d|d d |f }n||d|d d |f  }q|||  S )Nr3  c                      s
   d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r)   r)   r  r)   r*   rl     rm   z%adaptive_avg_pool2d.<locals>.<lambda>r   r   c                      s   dt   dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape rT  r6  r)   r8  r)   r*   rl     rm   rI   c                 s   s   | ]\}}|| V  qd S r4   r)   )r0   r  rW  r)   r)   r*   r)    s     z&adaptive_avg_pool2d.<locals>.<genexpr>c                 s   s$   | ]\}}}||d  |  V  qdS )r!   Nr)   )r0   r  rW  r  r)   r)   r*   r)    s    c                 S   s   t j| | |ddS )NtruncZrounding_moderZ   divr  r  r<  r)   r)   r*   start_index  s    z(adaptive_avg_pool2d.<locals>.start_indexc                 S   s    t j| d | | d |ddS )Nr!   r  r  r  r  r)   r)   r*   	end_index  s    z&adaptive_avg_pool2d.<locals>.end_indexc                    s   t j| t jd}||| }| | d }| | }|dkpD|| dk }|rV|d7 }n|dkrf|d8 }t j| t jd}|d| }|rt j| d |j|jd}	t ||	}||| }
|
| }n|}||||fS )Nrh  r!   r   rI   r  )rZ   r!  r"  rK   Zscalar_tensorr   r   minimum)in_sizeout_sizeZorangeZi0Z	maxlengthZin_size_modadaptive	range_maxr_  maxvali1r  )r   r  r  r)   r*   compute_idx  s,    
  
z(adaptive_avg_pool2d.<locals>.compute_idx.r   )r   rI   rk   c                 S   sd   t |tr| |fS |dk st||dk}|dkr>t|d}t| |d} t|| }| |fS d S )Nr   rI   r   r   r~   )r.   r   r   rK   rM   rZ   r  )valsr  r  r  rG   ra  r)   r)   r*   
maybe_mask  s    

z'adaptive_avg_pool2d.<locals>.maybe_mask)r  rG   r   )r   r   r  rZ   rn   r   int8r  Zint16ri  r"  nnr  adaptive_avg_pool2dr7  rD  Z
avg_pool2drM   r   r   rJ   )r   rB  r5  r  Zkernelr  ZidxhZlength_hZrange_max_hZ
adaptive_hZidxwZlength_wZrange_max_wZ
adaptive_wr  r  retr  jr)   )r   r  r  r   r  r*   r"    sb    

,      
    
&
r"  r  r1   rG   r  r  r_   c                C   s   t | |||d|dS )NTinplacer_   
_index_addr%  r)   r)   r*   
index_add_  s    	r*  c                C   s   t | |||d|dS )NFr&  r(  r%  r)   r)   r*   	index_add  s    
r+  )r1   rG   r  r  r'  r_   c                   s   t | j|}tjdkfdd  dkrnt | jttkpVt t	  fdd |  }| jdk}|r| 
dn| }d| f }|rtjntj}	|	|||dd}
|r| S |r|
dS |
 S d S )	Nr!   c                      s   d j  dS Nz(Index should have dimension 1 or 0 (got r   r  r)   r  r)   r*   rl     rm   z_index_add.<locals>.<lambda>c                      s   dt   d dS )Nzalpha argument of type z cannot be safely cast to type !)r  r)   )r_   python_typer)   r*   rl     rm   r   r4   TrX  )r=   canonicalize_dimsr  rZ   rn   Zdtype_to_typer   rw  Zis_weakly_lesser_typer  rK   ro   
index_put_	index_putrF  r  )r1   rG   r  r  r'  r_   zero_dimr   r_  r2  r   r)   )r_   r  r/  r*   r)  	  s*    	

r)  r1   rG   r  r  c                 C   s   t | |||ddS )NTr'  _index_copyr4  r)   r)   r*   index_copy_+  s    r8  c                 C   s   t | |||ddS )NFr5  r6  r4  r)   r)   r*   
index_copy0  s    r9  )r1   rG   r  r  r'  c          
         s   t | j|}t jdk fdd | jdk}|r@| dn| }d|  f }|r\tjntj}||||}	|rv| S |r|		dS |	
 S d S )Nr!   c                      s   d j  dS r,  r  r)   r-  r)   r*   rl   <  rm   z_index_copy.<locals>.<lambda>r   r4   )r=   r0  r  rZ   rn   rK   ro   r1  r2  rF  r  )
r1   rG   r  r  r'  r3  r   r_  r2  r   r)   r-  r*   r7  6  s    

r7  r   r   c                 C   sL   t | d| }t t |  }| jr6| d}n|}|t | |fS )Nr)   r  )rZ   r  r  rY   r   r  r   )rg   ru   r\   r   r)   r)   r*   log_sigmoid_forwardK  s    r:  r~   r1   lowhighc                 C   s"   t j| jt|t|| j| jdS )N)r<  r=  r   r   )primsZ_uniform_helperr   r   r   r   r;  r)   r)   r*   r   X  s    r   c                 C   s   |d kst | t| ||S r4   )r   r   r   )rg   r<  r=  r   r)   r)   r*   uniform_g  s    r?  c                 C   s   t | d }|d k	rDt|d kdd  tt ||kdd  |S |d k	rt|d kdd  tt ||kdd  g }t|D ]J\}}t||kr|| |d  t|  q|t| |d  |  q|S tddd  d S )	Nr"   c                   S   s   dS Nz9Must specify exactly one of output_size and scale_factorsr)   r)   r)   r)   r*   rl   s  rm   z.upsample_compute_output_size.<locals>.<lambda>c                   S   s   dS N r)   r)   r)   r)   r*   rl   u  rm   c                   S   s   dS r@  r)   r)   r)   r)   r*   rl   {  rm   c                   S   s   dS rA  r)   r)   r)   r)   r*   rl   }  rm   Fc                   S   s   dS r@  r)   r)   r)   r)   r*   rl     rm   )r  rZ   rn   r  r  r  r   )rf  rB  scale_factorsZspatial_dimensionsr  r  r)   r)   r*   upsample_compute_output_sizen  s0     rD  c                 C   s   | d krd S | | S r4   r)   )scalesr_  r)   r)   r*   get_scale_value  s    rF  c                 C   s&   t |  ||}t|d}t| ||S rd   )rD  r   rF  upsample_nearest1d)r   rB  rC  osizer`   r)   r)   r*   upsample_nearest1d_vec  s    
rI  c                 C   s2   t |  ||}t|d}t|d}t| |||S Nr   r!   )rD  r   rF  upsample_nearest2d)r   rB  rC  rH  scale_hscale_wr)   r)   r*   upsample_nearest2d_vec  s    

rN  c                 C   s>   t |  ||}t|d}t|d}t|d}t| ||||S r   )rD  r   rF  upsample_nearest3d)r   rB  rC  rH  Zscale_drL  rM  r)   r)   r*   upsample_nearest3d_vec  s
    


rP  c                 C   s   g }t |}| jtjkrtjn| j}t|D ]}|| }tj||| jd}| j| |  }	|| d k	rv|	|	||   n|	| }
||
 	tj
}t|d | D ]}|d}q|| q,t|S )Nr  r!   rI   )r  r   rZ   r  ro  rJ   r!  r   r   r6   r"  rK   r  r7  )r   rB  rE  r  Znum_spatial_dimsr  r5  rH  Zoutput_indicesisizer`   Zinput_indicesrL   r)   r)   r*   !_compute_upsample_nearest_indices  s    $rR  )r   rB  rE  rH   c                 C   s"   t | ||f\}t| d d |fS r4   rR  ro   _unsafe_index)r   rB  rE  Z	l_indicesr)   r)   r*   rG    s    rG  )r   rB  scales_hscales_wrH   c           
      C   sj   t | |||f\}}t| d d ||f}t| }| j\}}	}}| jjdkrZ|	dk rZtj	}|j
|d}|S )Ncudar   r   )rR  ro   rT  r=   rd  r   r   r  rZ   r   r  )
r   rB  rU  rV  	h_indices	w_indicesr   r   rL   
n_channelsr)   r)   r*   rK    s    	  
rK  )r   rB  scales_drU  rV  rH   c           	      C   s2   t | ||||f\}}}t| d d |||f}|S r4   rS  )	r   rB  r[  rU  rV  Z	d_indicesrX  rY  r   r)   r)   r*   rO    s    
  
rO  c                    sb   |r|rd n|rd n|r"d nd t   dksBtt  fddtdt  D S )NrC  r   rs   r"   r   c                    s    g | ]}t ||   qS r)   r6  r  Z
group_sizeparamsr)   r*   r2   	  s    z!gather_params.<locals>.<listcomp>)r  r   rJ   )r]  
has_biaseshas_projectionsr)   r\  r*   gather_params	  s    r`  c                 C   sh   |rB| d|  |d|   }}| d| d  |d| d   }}n| | ||  }}d\}}||||fS )Nr"   r!   NNr)   )r]  hiddensr  bidirectional
cur_params
cur_hiddenbidir_paramsbidir_hiddenr)   r)   r*   params_hiddens	  s    $rh  c                 C   s2   ||kst || d|||  | dd|S rd   )r   r  r   )re  last_batch_size
batch_sizerb  r)   r)   r*   update_hidden_for_packed	  s    rk  c              	   C   s4   ||kr| S ||k st t| |d||| fS rd   )r   rZ   concatr   )re  ri  rj  Z
inp_hiddenr)   r)   r*    update_hidden_for_packed_reverse"	  s    rm  c                 C   s&  |d }|d }|r|d nd }	|r,|d nd }
g }g }|rD|d n|d }| dd|}t| t|}|r||d d d }|D ]Z} | jd }||krn"|rt||||}nt||||}|| |||	||
}|}|| q|r|  n|| |  t	|d}|st	|dn|}||fS )Nr   r!   r"   rs   rI   )
r   rZ   r  r  r   rm  rk  r  reverser   )inphiddenr]  r^  	hidden_fnbatch_sizesrn  	ih_weight	hh_weightih_biashh_biasstep_outputrb  ri  re  	split_inpr  r   
hidden_outr)   r)   r*   one_layer_rnn_data0	  sL    
      

rz  c                    s    fdd}|S )Nc                    s    t ||||  S r4   r   linearr  re  rs  ru  rt  rv  nonlinearityr)   r*   rC   _	  s    zrnn_cell.<locals>.innerr)   r  rC   r)   r~  r*   rnn_cell^	  s    r  c                    s    fdd}|S )Nc                    s$   t | ||}  t ||||  S r4   r{  r}  r~  r)   r*   rC   f	  s    zrnn_cell_data.<locals>.innerr)   r  r)   r~  r*   rnn_cell_datae	  s    r  c                 C   s   |d }|d }|r|d nd }|r,|d nd }	t | ||}
|rL|
dn|
}
|d}g }|
D ] }|||||||	}|| qb|r|  t|d}||dfS )Nr   r!   r"   rs   )	r   r|  fliprK   r  rn  rZ   r   rF  )ro  rp  r]  r^  rq  rn  rs  rt  ru  rv  precomputed_inputre  rw  r  r   r)   r)   r*   one_layer_rnnm	  s    
r  c                 C   s   |d }|d }|r&|d }|d }nt | }t | }|d d}	|d d}
g }d}|	d}d}d}d}d}|  } |	 }	|
 }
t jjj| |||||	|
|||||||||}|d |d |d   }}}||	d|	dffS )Nr   r!   r"   rs   F)
rZ   Zzerosr   rK   r  r  ro   Zmkldnn_rnn_layerr  rF  )ro  rp  r]  r^  rn  Zw0Zw1Zw2Zw3hxcxrr  modeZhidden_size
num_layersrc  batch_firstrt  outputsrP   hycyr)   r)   r*   mkldnn_one_layer_lstm	  sN    


r  c
                 C   s   |r|  ddn| } g }
t|D ]}t||||\}}}}|rN||d k rN|nd}|	| |||\}}|
| |r|	| |||dd\}}|
| |rt||g| d } n|} |dkr |r ||d k r tj| |dd} q |r|  ddn| } | |
fS )Nr   r!   r~   T)rn  )rt  )	transposerJ   rh  r  rZ   r   rG   rv  )r   rp  r]  r^  r  rv  rt  rc  r  layer_fnfinal_hiddensr  rd  re  rf  rg  Zfwd_inpZ
fwd_hiddenZbwd_inpZ
bwd_hiddenr)   r)   r*   _rnn_helper	  s8       
    

r  c	                 C   sR   | d}	t||d}t| |	|||||||ttttjd
\}
}|
t|dfS Nr   Frq  )	unbindr`  r  r   r  r  rZ   r   stackr   r  r]  r^  r  rv  rt  rc  r  rp  r   r  r)   r)   r*   rnn_tanh_input	  s    
r  c	                 C   sR   | d}	t||d}t| |	|||||||ttttjd
\}
}|
t|dfS r  )	r  r`  r  r   r  r  rZ   r  r  r  r)   r)   r*   rnn_relu_input	  s    
r  c	                 C   sT   | d}	t||d}t| |	||||||dtt|ttjd
\}
}|
t|dfS Nr   Frr  rq  )	r  r`  r  r   rz  r  rZ   r  r  datarr  r  r]  r^  r  rv  rt  rc  rp  r   r  r)   r)   r*   rnn_relu_data
  s&    
r  c	                 C   sT   | d}	t||d}t| |	||||||dtt|ttjd
\}
}|
t|dfS r  )	r  r`  r  r   rz  r  rZ   r   r  r  r)   r)   r*   rnn_tanh_data?
  s&    
r  c                 C   s   t ||||  }|d|}|d  }	|d  }
|d  }|d  }|
| |	|  }||  }|d krv|nt ||d }||fS )Nr   r   r!   r"   rs   r   r|  chunkr   r   )ro  r  r  rt  rv  	hr_weight	chunk_dimZgatesZchunked_gatesZin_gateZforget_gateZ	cell_gateZout_gater  r  r)   r)   r*   	lstm_cellb
  s    r  c              
   C   s   |d }|d }|r|d nd }|r,|d nd }t |dkrD|d nt |dkrX|d nd }	|d d}
|d d}t| ||}|r|dn|}g }|D ](} t| |
||||	dd\}
}||
 q|r|  t	|d}||

d|
dffS )Nr   r!   r"   rs   rC  r   r  )r  rK   r   r|  r  r  r  rn  rZ   r   rF  )ro  rp  r]  r^  rn  rs  rt  ru  rv  r  r  r  r  rw  r   r)   r)   r*   one_layer_lstmp
  s$    *r  c              
   C   s  |d }|d }|r|d nd }|r,|d nd }	t |dkrD|d nt |dkrX|d nd }
g }g }|rp|d n|d }t| t|}|r|d d d }|d }|d }|dd||dd| }}|D ]} | jd }t| ||} ||k r:||d||| |d||| f |dd||dd| }}||krt	||d||| fd}t	||d||| fd}t
| ||||	|
dd\}}|}|| q|r|  ||f}n:|||f |  t| \}}t|dt|df}t|d}||fS )	Nr   r!   r"   rs   rC  r   rI   r  )r  rZ   r  r  r   r   r   r|  r  rl  r  rn  rD  r   )ro  rp  r]  r^  rr  rn  rs  rt  ru  rv  r  rw  rb  ri  rx  Zorig_hxZorig_cxr  r  r  ry  Zhidden0Zhidden1r   r)   r)   r*   one_layer_lstm_data
  sd    *  


  
r  c                 C   s    dd }|| ||rt S tS dS )a   Check whether we could use decompose lstm with mkldnn_rnn_layer.
    All the below conditions need to be met:
        * ``torch._C._has_mkldnn`` returns ``True``.
        * All the input args are on CPU.
        * The dtypes of args are either torch.float or torch.bfloat16.
        * Inference.
        * ``has_projections`` returns ``False``.

    Args:
        * input: the input sequence to LSTM
        * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM
        * params: the weight and bias tensors of LSTM
    c           	      S   s   t jjsdS | gt| tt| }dd |D }t|dkrFdS | }|t dkr`dS dd |D }|D ]}|t j	t j
fkrr dS qr| jrdS |d d|d dk}|rdS d	S )
NFc                 S   s   h | ]
}|j qS r)   r  r0   tr)   r)   r*   	<setcomp>
  s     zEselect_one_layer_lstm_function.<locals>.use_mkldnn.<locals>.<setcomp>r!   r  c                 S   s   h | ]
}|j qS r)   r   r  r)   r)   r*   r  
  s     r   r"   T)rZ   _CZ_has_mkldnnr  r   from_iterabler  popr   ro  Zbfloat16requires_gradr   )	r   r  r]  ZtensorsZdevicesr   Zdtypesr   r_  r)   r)   r*   
use_mkldnn
  s&    z2select_one_layer_lstm_function.<locals>.use_mkldnnN)r  r  )r   r  r]  r  r)   r)   r*   select_one_layer_lstm_function
  s    r  c	                 C   s   t |dkstdt|||d d|d dk}tt|d |d }	t| ||}
t| |	||||||||

\}}tt| }|t	|d dt	|d dfS )Nr"   lstm expects two hidden statesr   r!   )
r  r   r`  r   r  rD  r  r  rZ   r  )r   r  r]  r^  r  rv  rt  rc  r  rp  r  r   r  r)   r)   r*   	lstm_impl
  s$    $r  c	                 C   s   t |dkstdt|||d d|d dk}tt|d |d }	t| |	||||||dtt|d
\}
}tt| }|
t	
|d dt	
|d dfS )Nr"   r  r   r!   F)rr  )r  r   r`  r   r  rD  r  r   r  rZ   r  r  r)   r)   r*   lstm_data_impl  s"    $
r  c                 C   sr   |  dd}t||| dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nrs   r!   r"   r   )r  r   r|  r   r   ro  re  rs  ru  rt  rv  Zchunked_igatesZchunked_hgatesZ
reset_gateZ
input_gateZnew_gater)   r)   r*   gru_cell?  s    r  c                 C   s|   t | ||dd}t |||dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nrs   r!   r   r"   r  r  r)   r)   r*   gru_cell_dataH  s    r  c	                 C   sJ   t ||d}t| |d||||||dtt|td
\}	}
|	t|
dfS )NFr   r  )r`  r  r  r   rz  r  rZ   r  )r  rr  r  r]  r^  r  rv  rt  rc  r   r  r)   r)   r*   gru_impl_dataQ  s    r  c	                 C   sH   t ||d}t| |d|||||||tttd
\}	}
|	t|
dfS )NFr   r  )r`  r  r  r   r  r  rZ   r  )r   r  r]  r^  r  rv  rt  rc  r  r   r  r)   r)   r*   gru_implo  s    
r  c                 C   s:   t |  ||}t|d}t|d}tjj| ||||S rJ  )rD  r   rF  rZ   r  ro   _upsample_bilinear2d_aar   rB  align_cornersrC  rH  rL  rM  r)   r)   r*   upsample_bilinear2d_aa_vec  s    

    r  c                 C   s4   t |  ||}t|d}t|d}t| ||||S rJ  )rD  r   rF  upsample_bilinear2dr  r)   r)   r*   upsample_bilinear2d_vec  s    

r  )r   rB  r  rU  rV  rH   c           $      C   sf  | j \}}}}|d }	|d }
|	dkrV|r<|d |	d  }qZ|d k	rLd| n||	 }nd}|
dkr|rx|d |
d  }q|d k	rd| n||
 }nd}tj|	| j| jd}tj|
| j| jd}|r|| }|| }n0||d  d jdd}||d  d jdd}|tj}t|j|d dtj}|tj}t|j|d dtj}|	d}|	d}|	d}t
| d d ||g}t
| d d ||g}t
| d d ||g}t
| d d ||g}|| }d| }|| }d| }t||t|| } t||t|| }!t| |t|!| }"t| }#| jjd	krV|d
k rVtj}#|"j|#d}"|"S )Nr   r!   rX   r~   r  r   rt   rw   rW     r   )r   rZ   r!  r   r   rz   r6   r"  ceilrK   ro   rT  r   r=   rd  r  r   r  )$r   rB  r  rU  rV  Zn_batchrZ  Zin_hZin_wr]  r^  Zh_scale_factorZw_scale_factorr  r$  r1   rP   Zx_floorZx_ceilZy_floorZy_ceilZx_viewZx_floor_viewZx_ceil_viewZv1Zv2Zv3Zv4Zxscale2Zxscale1Zyscale2Zyscale1q1q2r   r   r)   r)   r*   r    sV    




r  )r  r  rH   c                 C   s   | j |j kS r4   r8  )r  r  r)   r)   r*   is_same_size  s    r  c                 G   s   t | |S r4   )ro   r  )r1   r   r?   r)   r)   r*   _reshape_alias  s    r  c                 C   s   t | |S r4   )ro   r  )r1   r  r)   r)   r*   _index  s    r  )rg   r   r   r   r   rH   c                 C   sV  |   }d}|dk rd}|d k	rX|dkrLdg| }|jd ||< ||}n|}| | } t||k|d}	|	|}
t| ||
| }t||k|d}|tj	j
kr|dkr| dd}||fS |d k	r
|| j}t|||
|}t||k|d}| }n||k | }|tjj
kr4| }n|tjj
krN| | }||fS )Nr!   r"   r   r)   r~   )rG   r   r  rZ   r[   rK   gatherrF  r    r&   rh   r   expandr   r6   r(   r'   )rg   r   r   r   r   Zn_dimsr   r   wr   Zsafe_target_r   r   Zwsumr)   r)   r*   _nll_loss_forward  s@    




r  c                 C   s   |   dkr|   dks td|  dks4td|   dkoJ|  dk}|s~| jd |jd ks~td| j d|j d| jd	 }|d ks|  dkr| |kstd
| d|j t| ||||S )Nr   r"   r   r!   r   r   r   r   rI   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rG   r   r   r   r  )rg   r   r   r   r   r   Z	n_classesr)   r)   r*   nll_loss_forward8  s(     



r  c                 C   s   t | ||||S r4   )r  )rg   r   r   r   r   r)   r)   r*   nll_loss2d_forwardS  s    r  )r1   ArH   c                 C   s    |d |  |d  |  |  d S )Nr"   rs   r!   r)   r1   r  r)   r)   r*   _upsample_cubic_convolution1`  s    r  c                 C   s(   ||  d|  |  d|  |  d|  S )NrC     r   r)   r  r)   r)   r*   _upsample_cubic_convolution2d  s    r  )r  rH   c                 C   s4   d}t | d |t| |td|  |t d|  |fS )Ng      rX   r   )r  r  )r  r  r)   r)   r*    _upsample_get_cubic_coefficientsh  s    r  )coeffstsrH   c                 C   s    t |}tdd t| |D S )Nc                 s   s   | ]\}}|| V  qd S r4   r)   )r0   r  r  r)   r)   r*   r)  t  s     z+_upsample_cubic_interp1d.<locals>.<genexpr>)r  _sum_tensorsrD  )r  r  Zcoeffs2r)   r)   r*   _upsample_cubic_interp1dr  s    r  )r  rH   c                 C   s   t tj| S r4   )r   rZ   add)r  r)   r)   r*   r  x  s    r  )	num_stepsr  r   r   c                 C   sB   | dkrt jd||dS |s(| d |  nd}t j| || ||dS )Nr!   r   rh  )Zstepsr   r   )rZ   r  Zlinspace)r  r  r   r   r  r)   r)   r*   _linspace_from_neg_one|  s    r  )thetahr  r  c           	      C   s   | j }| j}t||||d|d}t|||||dd}tjd||d}tjjj|dddd}tjjj|dddd}tjjj|d	ddd}|| | S )
Nr!   )r!   r!   r!   r  )r   r"   constantr   r:  r  rh   )r!   r!   )r"   r   	r   r   r  r  rZ   r  r!  r  r:  )	r  r  r  r  r   r   grid_xgrid_ygrid_oner)   r)   r*   _make_base_grid_4d  s    r  )r  r5  r  r  r  c                 C   s   | j }| j}t||||dd|d}t||||d|dd}t|||||ddd}	tjd||d}
tjjj|dddd}tjjj|dddd}tjjj|	d	ddd}	tjjj|
d
ddd}
|| |	 |
 S )Nr!   )r!   r!   r!   r!   r  )r   rs   r  r   r  rQ  )r"   r!   )rs   r   r  )r  r5  r  r  r  r   r   r  r  Zgrid_zr  r)   r)   r*   _make_base_grid_5d  s    r  r  r   r  c           	      C   sL   |\}}}}t | |||d}|ddd| jd d}||||dS )Nr  rI   rs   r!   r   r"   )r  r  r   rK   r   )	r  r   r  r  rL   r  r  	base_gridgridr)   r)   r*   _affine_grid_generator_4d  s     r  c           
      C   sR   |\}}}}}t | ||||d}|ddd| jd d}	|	||||dS )Nr  rI   r   r!   r   rs   )r  r  r   rK   r   )
r  r   r  r  rL   r5  r  r  r  r  r)   r)   r*   _affine_grid_generator_5d  s     r  c                 C   sD   t t|dkdd  t|dkr2t| ||dS t| ||dS d S )N)r   rC  c                   S   s   dS )NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r)   r)   r)   r)   r*   rl     rm   z'affine_grid_generator.<locals>.<lambda>r   r  )rZ   rn   r  r  r  r  r)   r)   r*   affine_grid_generator  s    
r  )r  r  interpolation_modepadding_moder  rH   c                    s  t dkfdd t dkfdd tttdfddttttdd	d
tttdfddtttdfdd}j\}
|j\}}tttd
fddt jjddddt j|jdd|dd ttttdfddtttd fdd|d }|d }	dkr||}
||	
}|
	 |	  d  }}d  }}|| }}||
 ||  }|
| ||  }||
 ||  }|
 |  }t
fdd|f|||f|||f|||ffD S dkrH||}
||	
}|
 }| }||dS |}
|	
}|
	 |	 |
 | }tttd
fdd 	ttd!	fd"d#tfd$dtd%D }t||dS d S )&N)r   r!   r"   c                      s
   d  S )NzInvalid interpolation mode r)   r)   )r  r)   r*   rl     rm   z!grid_sampler_2d.<locals>.<lambda>c                      s
   d  S )NzInvalid padding mode r)   r)   )r  r)   r*   rl     rm   )coordsr   rH   c                    s0    r|d d n|d }|d d }| | | S Nr   r)   )r  r   r   ofsr  r)   r*   unnormalize  s    z$grid_sampler_2d.<locals>.unnormalize)r  	twice_low
twice_highrH   c                 S   sv   ||krt | S |d }|| d }| |  }t ||}||  jt jd}t |d@ dk|| || | S )Nr"   r   r!   r   )rZ   r   r   fmodfloorr6   r   r[   )r  r  r  Z
coords_minZcoords_spanZcoords2extraZflipsr)   r)   r*   reflect_coordinates  s    

  
z,grid_sampler_2d.<locals>.reflect_coordinatesc                    sj   dkr| S dkr&t | d|d S  r@| dd|d  }n| dd| d }t |d|d S d S )Nr   r!   r"   rI   ry   )r  r   Zcoords_reflected)r  r  r  r)   r*   compute_coordinates  s    z,grid_sampler_2d.<locals>.compute_coordinatesc                    s   | |} ||S r4   r)   )r  r   Z	coords_un)r  r  r)   r*   compute_source_index  s    
z-grid_sampler_2d.<locals>.compute_source_index)xsysrH   c                    s,   t d| kt | k t d|k| k S rd   )rZ   rn  )r  r  )iHiWr)   r*   in_bounds_cond  s     z'grid_sampler_2d.<locals>.in_bounds_condr  r!   )r  r  wsrH   c                    s@   | | t  fdd| jtjd|jtjd|fD S )Nc                 3   s(   | ] }t |d  dV  qdS )r   r!   N)rZ   r[   r  r  )r  r0  oHoWr)   r*   r)    s   z0grid_sampler_2d.<locals>.clip.<locals>.<genexpr>r   )r7  r6   rZ   r"  )r  r  r  )r  r  r  r  )r0  r*   clip  s    
zgrid_sampler_2d.<locals>.clip)ixiyrH   c                    s&   | ||\}}} ||f | S r4   r)   )r  r  r  Zidx_xZidx_yZw_)C_idxN_idxr  r   r)   r*   get_summand  s    z$grid_sampler_2d.<locals>.get_summand).r   ).r!   r   c                 3   s    | ]\}}} |||V  qd S r4   r)   )r0   r  r  r  )r  r)   r*   r)  -  s   z"grid_sampler_2d.<locals>.<genexpr>c                    s     | } |}||dS rQ   r)   )r  r  r1   rP   )r  r  r  r  r)   r*   get_value_boundedH  s    

z*grid_sampler_2d.<locals>.get_value_bounded)r  rH   c                    sL   | d  } d | | d | d |f}t |dS )Nr!   r"   )r  rK   )r  Ziy_ofscs)r  ix_nwiy_nwtxr)   r*   	get_coeffM  s    z"grid_sampler_2d.<locals>.get_coeffc                 3   s   | ]} |V  qd S r4   r)   )r0   r  )r  r)   r*   r)  W  s     r   )rZ   rn   r   r  r   r!  r   r  r   r  r  roundr7  rJ   r  rK   )r  r  r  r  r  r  r  rL   r1   rP   r  r  Zix_neZiy_neZix_swZiy_swZix_seZiy_seZw_nwZw_neZw_swZw_seZ
ix_nearestZ
iy_nearesttyr  r)   )r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r	  r  r  r  r  r
  r  r*   grid_sampler_2d  sn    	
 




	





r  c                    s`   t   dko dk fdd t  ddk fdd   jddS )Nr"   r!   c                      s   d    d   S )Nzmatrix @ vector expected, got r  rk   r)   rg   r  r)   r*   rl   a  rm   zmv.<locals>.<lambda>r   c                      s*   d  d d  d d d dS )Nzsize mismatch, got input (r   r1   r!   z), vec (r   )r   r)   r  r)   r*   rl   e  rm   rk   )rZ   rn   rG   r   r   r  r)   r  r*   r  [  s    r  c                    s     rZ rB  r0t    S t  S n  rZt  S t dkot  dk fdd tj jk fdd  fdd}t	  	 k|   
 S )Nr!   c                      s   d   d    dS )Nz1D tensors expected, but got zD and z	D tensorsrk   r)   r   rg   r)   r*   rl   y  rm   zdot.<locals>.<lambda>c                      s   dj  d j  S )Nz:dot : expected both vectors to have same dtype, but found  and r   r)   r  r)   r*   rl   }  rm   c                	      s.   d   d    d   d    d	S )Nz+inconsistent tensor size, expected tensor [z] and src [z.] to have thesame number of elements, but got r  z elements respectivelyr  r)   r  r)   r*   numel_error  s    ,zdot.<locals>.numel_error)r  Zis_conjrZ   dotZconjZvdotrn   rG   r   r   r   )rg   r   r  r)   r  r*   r  j  s$    
r  c                 C   s   |   d}|d k	rV|d | d }d| |  ||  |  |    |   }n,d| |  | |  |  |     }|d k	r|| }t||S rJ  )r   rY   r   r   )rg   r   r   Z
pos_weightr   r   Z
log_weightr   r)   r)   r*    binary_cross_entropy_with_logits  s    
r  )tensor1tensor2rH   c                 C   s   | j |j kr| |fn|| f\}}|j dkr4|j dks8dS |jrBdS | j dkrPdS | dkr`dS |j}| }tdd t|d d |d	d
 |d	d
 D S )Nrs   r"   FTr   c                 s   s    | ]\}}}||| kV  qd S r4   r)   )r0   Zst1Zst2s2r)   r)   r*   r)    s   zshould_fold.<locals>.<genexpr>r   r!   rI   )r  r  r   r   r  r+  rD  )r  r  t1t2Zt1_shapeZ	t1_strider)   r)   r*   should_fold  s     
"r  c                 C   s  |   }|  }|dkr |dks$t|dkr@|dkr@t| |S |dkr\|dkr\t| |S |dkr|dkrttt| d|dS |dkr|dkrt| |S t| |rt||k}|r|j	n| }|s|n|dkr| 
 n| }|j}t|d d }ttj|}	|  dk}
|
r&||jd  ||	|d }|
r`|||}|r\|j	 S |S |||S n|dkrp|dkrp|dkr| dnd}| d}| jd d }|dkr|dn|d}|dkr|dnd}g }t|d D ]}||| q|dkr|dkr|d |d kr|d dkrd|  rdt| d|S |d dkr| rt| |dS tt||}|||g }t|}| ||||}|dk}|r||g }||||d}n |||g }|||||}|}|dkr.|| |dkrB|| |r^||d|S |||S ntddd	  d S )
Nr   r!   r"   rI   r   rs   Fc                   S   s   dS )Nz/both arguments to matmul need to be at least 1Dr)   r)   r)   r)   r*   rl   .  rm   zmatmul.<locals>.<lambda>)rG   r   rZ   r  r  rF  r  rK   r  r   r  r   r  r   operatorr   r  r   r  r  r   rJ   r  r   Zbroadcast_shapesrZ  r  Zbmmrn   )r  r  Zdim_tensor1Zdim_tensor2r  r  r  Zsizes_1Zoutput_shapeZfolded_dim1Zt2_is_matrixZ	t1_foldedr   r  m1Zbatch_tensor1m2r   Zbatch_tensor2r  Zexpand_batch_portionZtensor1_expand_sizeZexpand_batch_productZtensor1_expandedZ
vector_rhsZtensor2_expand_sizeZtensor2_expandedr)   r)   r*   r     s    	


  

 
  



r   )r  rB  r  rL  rM  rH   c                    s  j \}}|\}}ddd}	dd }
|	|||}|	|||}tj|jd|dddtj|jdd|dd tj|jddd|df}tj|jdddd|f}|
|||}| }|| |jtjd}|
|||}| }|| }|jtjd}|d ||d |d f}|d ||d |d f fd	d
fddtfdd|D }t	||}t
}|j|d}|S )Nc                 S   sD   |r |dkr| d |d  S dS |d k	r8|dkr8d| S | | S d S )Nr!   r   r)   )r  r  r  r`   r)   r)   r*   compute_scale=  s    z1upsample_bicubic2d_default.<locals>.compute_scalec                 S   s    |r| | S | |d  d S d S r  r)   )r`   Z	dst_indexr  r)   r)   r*   r  C  s    z8upsample_bicubic2d_default.<locals>.compute_source_indexr  r!   r   r"   c                    s8   t | dd }t |dd }t ||gS rJ  )rZ   rz   ro   rT  )r  r  Zy_idxZx_idx)r  r  r  r  r  r)   r*   load_bounded^  s    z0upsample_bicubic2d_default.<locals>.load_boundedc                    s"   t  fddD }t|S )Nc                 3   s   | ]} |V  qd S r4   r)   )r0   Zx_ofs)r  rP   r)   r*   r)  d  s     zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>)r7  r  )rP   Zcoeffs_x)ixs_ofsr  t_x)rP   r*   get_x_interpc  s    z0upsample_bicubic2d_default.<locals>.get_x_interpc                 3   s   | ]} |V  qd S r4   r)   )r0   Zy_ofs)r"  r)   r*   r)  g  s     z-upsample_bicubic2d_default.<locals>.<genexpr>r   )N)r   rZ   r!  r   r  r  r6   r"  r7  r  r=   rd  r  )r  rB  r  rL  rM  r  r  r  r  r  r  Zheight_scaleZwidth_scaleZout_yZout_xZreal_xZin_xr  Zreal_yZin_yZt_yr  Ziys_ofsZcoeffs_yr   r   r)   )	r  r  r  r"  r  r  r   r  r!  r*   upsample_bicubic2d_default1  s6    	


r#  )r  rB  r  rC  rH   c                 C   s   t t|t| dkdd  |d krd|d k	s4tttttf tdd t| j	dd  |D }|rl|nd\}}t
| ||||S )Nr!   c                   S   s   dS )Nz:Must specify exactly one of output_size and scale_factors.r)   r)   r)   r)   r*   rl   }  rm   z(upsample_bicubic2d_vec.<locals>.<lambda>c                 s   s"   | ]\}}t t|| V  qd S r4   )r   r   )r0   r  r`   r)   r)   r*   r)    s   z)upsample_bicubic2d_vec.<locals>.<genexpr>r"   ra  )rZ   rn   rw  r   r   r   r  r7  rD  r   r#  )r  rB  r  rC  rL  rM  r)   r)   r*   upsample_bicubic2d_vecp  s    
r$  ru   rx   r  c                C   s(   t j| ||d}t j| ||d}||fS )Nr  )rZ   aminr  )rg   rG   r  r%  r  r)   r)   r*   aminmax  s    r&  c                C   s"   t jtt| d| |||dS )Nr   r   )ro   r   rZ   r[   isnan)rg   rG   r  r   r)   r)   r*   nansum  s    r(  r   r  r   r  r   r   r  r   r  c             	   C   s   t jjd| d||||dS )Nr   r!   r)  ro   r!  Z
start_stepr*  r)   r)   r*   arange_default  s    
      r,  r   r   r   r  r   r  c             	   C   s   t jj| |d||||dS )Nr!   r)  r+  r-  r)   r)   r*   arange_start  s    
      r.  )r   r   r   marginr   r   rH   c           	         s  t t jd jd  t |dkp:|dkdd  t jdkoX dkfdd t jdko~ kfdd d k	rt t jdko  k fdd dt jdd	}||  }|	d}|dkr|n|| }d k	r$|  }t j
 jd
}t |k|d}|tjjkr\| S |tjjkr|| |jd  S |jddS d S )Nr   r!   r"   c                   S   s   dS )Nz only p == 1 and p == 2 supportedr)   r)   r)   r)   r*   rl     rm   z#multi_margin_loss.<locals>.<lambda>c                      s   d j  S NzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: r8  r)   )r   r)   r*   rl     rm   c                      s   d  dj  S )Nz#inconsistent target size, expected rS  r8  r)   )nframer   r)   r*   rl     rm   c                      s   d  dj  S )Nz#inconsistent weight size, expected rS  r8  r)   )rG   r   r)   r*   rl     rm   rG   r  r  rk   )rZ   
atleast_2dZ
atleast_1dr   rn   r  r   rK   r  r   r!  r   r[   r    r'   rh   r   r(   r   )	r   r   r   r/  r   r   ur\   r_  r)   )rG   r   r1  r   r   r*   multi_margin_loss  sB    








r5  	is_targetc                    s  | j  |j t| } t|}| j d }tt dko@|dk fdd ttdkod k fdd tj||jd}|dk}tjt|||dd	d
}||k }t||d}tj	| d|d}	t||d}
tj
||
jddkdd}d|	jjdd |  }|d}|| }t|d|}|tjjkrJ|jdd }n$|tjjkrb| }n|jdd}|| j}||fS )Nr!   r"   r   c                      s
   d  S r0  r)   r)   )orig_input_shaper)   r*   rl     rm   z0multilabel_margin_loss_forward.<locals>.<lambda>c                      s   d d  S )Nzinconsistent target size: z for input of size: r)   r)   r7  Zorig_target_shaper)   r*   rl     rm   r  rI   Tr  r2  rk   rX   )r   rI   )r   rZ   r3  rn   r  r!  r   r%  r[   r  anyrK   Tr   r    r'   rh   r   r   r(   r6   r   r   )r   r   r   rG   r_  Zis_endZend_idxZtarget_maskZtidx0r4  Ztidx1r6  r\   r)   r8  r*   multilabel_margin_loss_forward  s@    





r;  c                    s   t |  fdd}|S )Nc                     s    | |}| d  |S rd   )r   )r?   r@   r   outplace_opr)   r*   
inplace_op  s    
z$register_inplace.<locals>.inplace_opr   )Zaten_opr=  r>  r)   r<  r*   register_inplace  s    r?  )F)r   )FN)FN)r"   )r   NNr!   )N)r!   )r!   )rI   FF)r   )r   )r!   r!   )r!   r!   F)r!   r!   )r   )N)r~   rX   )r   r!   N)N)NN)NNN)F)F)F)F)F)NN)r   r   F)NN)N)NF(A  rD   r  r  enumr   r   r   	itertoolsr   r   typingr   r   r	   r
   r   r   r   rZ   Ztorch._primsr   r>  Ztorch._prims_commonZ_prims_commonr=   Ztorch.nn.functionalr!  r  r   r   r   r   Ztorch._decompr   r   r   r   r   Ztorch._prims_common.wrappersr   r   r   r   Z%torch.fx.experimental.symbolic_shapesr   r   Ztorch.utils._pytreer   r   r  ZDispatchKeyr   str__annotations__Z_opsr  ro   r    r~  rw  rF   r  Zcompute_only_pw_cast_for_opmathZpw_cast_for_opmathZINT_TO_FLOATZpw_cast_for_int_to_realr  rM   rS   rT   r]   ro  re   fillZScalarri   rq   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r  Zpy_implZAutogradCUDA	Generatorr   r   r   r   r   r   r   r'   rh   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  slicer  r  r  r  r  r  r#  rP  r`  re  rk  rr  rv  ZCompositeImplicitAutogradZAutogradru  r  r  Zrsubr  r  r  r  rZ  r  r  r  r  r  r  r  r  r  r  r  r  r  Zunsafe_chunkr  r  r  Zno_statsr  r  Z_fused_dropoutr  r  r  r  r   r   detachZliftZ
lift_freshr  r  r	  r  r  Z_adaptive_avg_pool2dr"  r*  r+  r)  r8  r9  r7  r:  r   r?  rD  rF  rG  r  rI  rK  rN  rO  rP  rR  r`  rh  rk  rm  rz  r  r  r  r  r  Zrnn_tanhr   r  Zrnn_relur  r  r  r  r  r  r  r  Zlstmr  r  r  r  Zgrur  r  r  r  r  r  r  r  Z_unsafe_viewrT  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   Zupsample_bicubic2dr#  r$  r&  r(  r!  r  r  r,  r   r.  r5  r;  r?  Zaddbmm_ZaddbmmZaddmm_Zaddmv_Zbaddbmm_ZbaddbmmZfill_Zgelu_r  Z
hardswish_Z	hardtanh_ZhardtanhZhardsigmoid___iand____and____ilshift__
__lshift__r1  r2  Zindex_reduce_Zindex_reduce__ior____or____irshift__
__rshift____ixor____xor__Zleaky_relu_r   Zlogit_ZlogitZrelu_r  Zrenorm_ZrenormZround_r  Zscatter_r   Zscatter_add_Zscatter_addZscatter_reduce_Zscatter_reduceZsilu_r)   r)   r)   r*   <module>   s   $
$  
      

  
	     
    

    
 *!	
   2            	P`    
  

    "    
VMR#



%c"j"    "
 


	
	
 
  .2)


 
 ?2

		


	

 N


5


   



"
   


  w 
=


,

0	