U
    9%eW                    @   sT&  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZmZ d dlZd dlZd dlZd dlm  mZ d dlmZmZmZmZmZmZmZmZmZm Z m!Z! d dl"m#Z#m$Z$ d dlm%Z% d dl&m'Z'm(Z(m)Z) d	d
l*m+Z+ ddl,m-Z-m.Z.m/Z/m0Z0 ddl1m2Z2m3Z3 ddl/m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z> ddlm?Z?m@Z@mAZAmBZB ddlCmDZDmEZE eFeGZHi ZIi ZJeK ZLejDjMZMejDjNZNejDjOZOeK ZPeK ZQdd ZRdd ZSdd ZTeSeMjUeMjVeMjWeMjXeMjYeMjZeMj[eMj\eMj]eMj^eMj_eMj`g ejaejbejcejdejeejfejgejhejiejjejiejkejldZmendddZodd Zpdd Zqedd d!Zrd"d# Zsd$d% Ztd&d' Zud(d) Zvd*ejwd*fd+d,Zxd-d. Zydd/d0Zzdd1d2Z{dd3d4Z|de<ej}d5d6d7Z~exeOjdde<ej}d5d8d9Zd*d:e<ej}d5d;d<ZexeMjj}dde<ej}d5d=d>Zd*d:e<ejd?d@dAZexeOjdde<ejd?dBdCZddDejwd*ddd*d*fdEdFZddGdHZexeMjd*ddIdJdK ZexeMjd*ddIdLdM ZexeMjeMjeMjeMjeOjgdNdO ZeeMdPrexeMje exeMjddddQdRZexeMjddddSdTZexeMjgddUdVZexeMjdWdX ZexeMjdYdZ ZexeMjd[d\ ZexeMjd]d^ ZexeMjd_d` ZexeMjdadb ZexeMjdddcdd ZexeOjdddedf ZexeMjdddgdh ZexeMjdidj ZexeMjddexeMjddexeMjdddkdl ZexeMjdddmdn ZexeMjddddpdqZexeMjdde fdrdsZexeMjUddddtduZUexeMjddddvdwZexeMjddddxdyZexeMjddzd{ZexeMjdddenenend|d}d~ZexeMjdddenenend|ddZexeMjdddenenend|ddZexeMjdddd ZexeMjdddddZexeMjdddddZexeMjdddddZexeMjdddd ZexeMjdddd ZexeMjdddd ZdddZexeMjdddZdd Ze  dddZe ddd ZejjdddZejjdddZdejjdddZdddZdd ZexejDjjdddd ZexeMjdddd ZexeMjdddd ZexeMjjdddd ZexeMj˃dd Ze ddd Zdd ZeeMj΃ZeeMjЃZeeMj҃ exeMj΃dd ZexeMjЃdd Zexe.jdddd Zexe.jddejdddZexe.jdddd Zexe.jdddd Zexe.jddd dÜeen e<eendĜddƄZexe.jddd dÜeneneen e<endǜddɄZexeMjddd*d*dʜe<e<ekekd˜dd̈́Zddτ Zddф Zddӄ ZddՄ ZdhZeeMje eeMjZe eeMje eeMje eeMjed*d׍ eeMjed*d׍ eeMje eeMje eeMj eeMj eeMj eeMj eeMje eeMj eeMj eeMj eeMj eeMj eeMj eeMjj eeMj eeMje eeMj eeMje eeMje eeMj eeMj eeMj eeMj  eeMj eeMj eeMj eeMj eeMjd*d׍ eeMjd*d׍ eeMj eeMj eeMj	 eeMj
 eeMj eeMjd*d׍ eeMjd*d׍ eeMj eeMj eeMj eeMj eeMj eeMj eeMj eeMj eeMj eeMj eeMjd*d׍ eeMjd*d׍ eeMj eeMj eeMj eeMj eeMj eeMj eeMj eeMj eeMj eeMj  eeMj! eeMj" eeMj# eeMj$ eeMj% eeMj& eeMj' eeMj( eeMj) eeMj* eeMj+ eeMj, eeMj- eeMj. eeMj/ eeMj0 eeMj1 eeMj2 eeMj3 eeMj4 eeMj5 eeMj6 eeMj7 eeMj8 eeMj9 eeMj: eeMj; eeMj< eeMj= eeMj> eeMj?d*d׍ eeMj@d*d׍ eeMjAd*d׍ eeMjB eeMjC eeMjD eeMjEd*d׍ eeMjF eeMjG eeMjHd*d׍ eeMjId*d׍ eeMjJd*d׍ eeMjK eeMjL eeMjM eeMjN eeMjO eeMjPd*d׍ eeMjQ eeMjR eeMjSd*d׍ eeMjTd*d׍ eeMjU eeMjV eeMjWd*d׍ eeMjXd*d׍ eeMjY eeMjZ eeMj[ eeMj\ eeMj] eeMj^ eeMj_ eeMj` eeMja eeMjb eeMjc eeMjd eeMje eeMjf eeMjgd*d׍ eeMjhji eeMjjjk eeMjljk eeMjm eeMjn eeMjo eeMjp eeMjqjr eeMjsjk eeMjtjk eeMju eeMjv eeMjw eeMjx eeMjy eeMjzjkd*d׍ eeMj{ eeMj|j} eej~jj eej~jj eeMjjkd*d׍ exeMjdddddلZexeMjddڜdd܄ZeeMd݃rexeMje exeOjdd߄ ZexeMjddenendddZexeMjdddddZdd ZexejeMjgdddd*dddZexejdddZexejdd ZexeMjdd Zdd ZexeMjdddd Zdd ZexejeMjgdddddddddZdd Zdd ZexeMjeeZeedZeed Zdd ZexeMjdddddd dZexeMjdddddddZexeMjdddddddZexeOjjkdd ZexejeMjgdd	 ZexeMjdddd
dZexeMjdddddZdd Zdd ZexeMjdddd ZexeMjdddd ZexeMjdddZexeMjdddZdd Zdd ZexeMjdddddZd d! ZexeMjdddd"d#ZexeMjddend$d%d&ZddDd'enee ekd(d)d*ZexeMjdddd+enee d,d-d.ZexeMjddend$d/d0ZexeMjddend$d1d2ZexeMjddend$d3d4ZexeMjdddDd5enekd6d7d8Zdeee d9f end:d;d<ZexeMjjkdee d=d>d?ZexeMj^jkdee ee d@dAdBZ^exeMjjkdee ee ee dCdDdEZdFdG ZexeMj_jkd ekee ee dHdIdJZexeMjdKdL ZexeMjdMdN ZexeOjjkdOdP ZexeMjddddQdRZejeejenf dSdTdUZejejdVdWdXZejejejdYdZd[Zdd]d^Zd_d` ZeeMj[ZexeMj[ddddadbZ[eeMj\ZexeMj\dddcdd Z\dedf ZŐdgdh ZeeMjǃZexeMjǃdidj ZexeMjɐjkddkdlZeeMjVZexeMjVddddndoZVeeMjWZexeMjWddddpdqZWdrds Z̐dtdu Z͐deܐdvdwdxZexeMjσddddydzZϐd{d| ZАd}d~ Zѐdd ZexeMjeOjgd	dd*dddZexeMjՃd
dd*dddZՐdd Ze{dd ZeeMj؃ZexeMjdDddd Zؐdd ZexeMjۃdd ZexeMjdddddZe{dd Ze{dd ZexeMjdDddddZexeMjgdDddd ZexeOjgdDddd ZexeMjeMjߐjgdDejdIeZexeMjeOjgdDddd ZexeMjdd ZexeMjeOjgdddddZexeMjdddddZexeMjdddZexeMjdddZexeMjdddZexeOjeΐd exeMjeΐdZexeMjeΐdZexeMjeΐdejedZexeMjeΐdejedZeeMjdDddZdd Zdd ZeeMjZeeMjZeeMjZeeMj Z eeMjZeeMjZeeMjZeeMjdDdZeeMj eeMj eeMj eeMjZeeMj	Z	eeMj
ddZ
eeMjZeeMjZeeMjZeeMj eeMjZexeMjejde eeMj eeMj eeMj eeMjH eeMjddDejkdZeeMjddDejkdZeeMjddDejkdZeeMjddDejkdZeeMjZeeMjZexeMje exeMje eeMjZeeMjZeeMj eeMjddZeeMj eeMj ejkd eeMj!ejkd eeMj"ejkd eeMj#ejkd eeMj$ejkdZ$eeMj%ejkd eeMj&ejkd eeMj' eeMj( eeMj) eeMj* eeMj+ eeMj, eeMj- eeMj. eeMj/ eeMj0 eeMj1 eeMj2 eeMj3 eeMj4 eeMj5 eeMj6jedDd eeMj6j7edDd eeMj8je eeMj8j7e eeMj9je eeMj9j7e eeMj:jke eeMj;j7e؃ eeMj;j<e؃ eeMj=je߃ eeMj=j7e߃ eeMj>e eeMj?je eeMj?j7e eeMj@e eeMjAe eeMjBe dd ZCeCeMjDe eCeMjEe eCeMjFe	 eCeMjGe
 eCeMjHe eCeMjIe eCeMjJe eCeMjKe eCeMjLje߃ eCeMjLjMe eCeMjNe eCeMjOe eCeMjPe eCeMjQe eCeMjRe eCeMjSe  eCeMjTe exeMjUe exeMjVe	 exeMjWe exeMjXe exeMjYe eCeMjZeMjU eCeMj[eMjV eCeMj\eMjW eCeMj]eMjX eCeMj^eMjY exeMj_dd Z_exeMj`dd Z`exeMjaddÄ ZaexeMjbdĐdń Zbe#c D ]"\ZdZeexe$edee $qexeMj˃dƐdǄ ZfexejDjgjhdȐdɄ Zizd dljZejDjkZkexekjldʐd˄ Zmexekjnd̐d̈́ Zoexekjpdΐdτ ZpexekjqdАdф ZqexekjrdҐdӄ ZrexekjsdԐdՄ Zsexekjtd֐dׄ ZtW n& euk
&r   eHvdء Y nX ddl,mwZw e+ew ddl,mxZx exy  dS (      N)defaultdict)Iterable)AnyListOptionalTupleUnion)canonicalize_dimcanonicalize_dimscheckdtype_to_typeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDis_boolean_dtypeis_float_dtypeis_integer_dtypeNumbertype_to_dtype)magic_methodsmethod_to_operator)tree_flatten)CeilDivFloorDivModularIndexing   )import_submodule   )configinductor_primsirtest_operators)decompositionsget_decompositions)
ExpandViewIndexingConstant	is_tritonops_wrapperPermuteView	Pointwise	ReductionSqueezeView	TensorBoxvalidate_irView)ceildivdecode_devicepad_listlikesympy_product)opsVc                 C   s   | st d| d S )Nzinductor does not support NotImplementedError)condmsg r8   W/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torch/_inductor/lowering.py
assert_nyi>   s    r:   c                 C   sX   t | tttfrdd | D S t|  t | tjjrT| 	 D ]}tt
| | q>d S )Nc                 S   s   g | ]}t |qS r8   )add_needs_realized_inputs.0xr8   r8   r9   
<listcomp>E   s     z-add_needs_realized_inputs.<locals>.<listcomp>)
isinstancelisttuplesetneeds_realized_inputsaddtorch_opsOpOverloadPacket	overloadsgetattr)fnoverloadr8   r8   r9   r;   C   s    
r;   c                 C   s8   t | tjjr,|  D ]}|tt| |< qn|t| < d S N)r@   rF   rG   rH   rI   layout_constraintsrJ   )rK   
constraintrL   r8   r8   r9   add_layout_constraintL   s    rP   )r   r   r                     	   
         dtypec                 C   s2   t | ts| S | tks&td|  dt|  } | S )Nzid z missing from DTYPE_ID_LOOKUP)r@   intDTYPE_ID_LOOKUPAssertionErrorr[   r8   r8   r9   decode_dtype~   s
    
r`   c                 C   sF   t | tr"t|  p t|  S t | tjr8| jdkS t | tS d S NT)	r@   r+   r   	get_dtyper   sympySymbol
is_integerr]   r>   r8   r8   r9   is_integer_type   s
    

rg   c                 C   s$   t | trt|  S t | tS d S rM   )r@   r+   r   rb   boolrf   r8   r8   r9   is_boolean_type   s    
ri   type_promotion_kindc                    s0   dd   fdd|D }t |d| i\}}|S )Nc                 S   sJ   t | ttjfr| S t| ds"tt|  }tj	dg| | 
 dS d S )Nrb   r   r[   )r@   r   rc   rd   hasattrr_   lenget_sizerF   zerosrb   )inpdimr8   r8   r9   construct_input   s
    z+get_promoted_dtype.<locals>.construct_inputc                    s   g | ]} |qS r8   r8   )r=   argrr   r8   r9   r?      s     z&get_promoted_dtype.<locals>.<listcomp>rk   )r   )rk   argsZinps_r\   r8   rt   r9   get_promoted_dtype   s    	rw   c                 C   sh   t | ttfs| g} nt| } t| D ]<}t |tjjr&| D ] }t||}|tkr@| 	| q@q&| S rM   )
r@   rA   rB   rF   rG   rH   rI   rJ   	loweringsappend)aten_fnrK   rL   Zother_fnr8   r8   r9   get_overloads   s    
r{   c                    s   dd t  D |s|rhrh|r*tjndd  D }t|d|i fddfdd D  |rrtt fddD  D ]\}}| |< qtt D ]8}t | t	j
rt | t d	    |< q S )
Nc                 S   s   g | ]\}}t |tr|qS r8   r@   r+   r=   ir>   r8   r8   r9   r?      s     
 z"transform_args.<locals>.<listcomp>c                 S   s$   g | ]}t |tst|d r|qS rb   )r@   r   rl   r=   ar8   r8   r9   r?      s    
 
 rk   c                    sD   t | trt| S t | tjr<t| j d   S | S d S Nr   )r@   r+   to_dtyper   Constantvalue
get_device)rs   )ru   r\   indicesr8   r9   promote   s
    

ztransform_args.<locals>.promotec                    s   g | ]} |qS r8   r8   r   )r   r8   r9   r?      s     c                    s   g | ]} | qS r8   r8   r=   r~   ru   r8   r9   r?      s     r   )	enumeraterF   rh   rw   zipbroadcast_tensorsrangerm   r@   r   r   r#   createrA   rn   )ru   	broadcastrk   convert_input_to_boolZpromoting_argsr~   r>   r8   )ru   r\   r   r   r9   transform_args   s*    $
&r   c                    sD   t   fddt| }t| tfdd|D  S )a  
    Add a foreach lowering to lowerings dict.

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                     s&   t | dkst | |}t| |S )Nr   )rm   r_   r,   )ru   kwargsout)	decomp_fnr8   r9   wrapped   s    
z+_register_foreach_lowering.<locals>.wrappedc                    s   i | ]
}| qS r8   r8   r=   rK   r   r8   r9   
<dictcomp>   s      z._register_foreach_lowering.<locals>.<dictcomp>)	functoolswrapsr{   foreach_opsupdaterx   )rz   r   Zaten_fnsr8   )r   r   r9   _register_foreach_lowering   s    
r   c                    sB   t  fddt  tfdd D  S )a  
    Add a lowering to lowerings dict

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                     s   t | } d}t| dkr6t| d t tfr6d}| d } tdd | D rTtdtdd | D rtd	d  D stt	| } |r| g} | |}t
| |S )
NFr   r   Tc                 s   s   | ]}|d kV  qdS )r   Nr8   r<   r8   r8   r9   	<genexpr>  s    z6_register_lowering.<locals>.wrapped.<locals>.<genexpr>zout= ops aren't yet supportedc                 s   s   | ]}t |tV  qd S rM   r|   r<   r8   r8   r9   r     s     c                 s   s   | ]}|t kV  qd S rM   )	fallbacksr   r8   r8   r9   r     s    )rA   rm   r@   rB   anykeysr_   valuesallr   r,   )ru   r   unpackedr   )rz   r   r   r   rk   r8   r9   r      s0    
   
z#_register_lowering.<locals>.wrappedc                    s   i | ]
}| qS r8   r8   r   r   r8   r9   r     s      z&_register_lowering.<locals>.<dictcomp>)r   r   r{   rx   r   )rz   r   r   rk   r   r8   )rz   r   r   r   rk   r   r9   _register_lowering   s
    r   Fc                 C   s   t jt| |||dS )z+
    Shim to support decorator syntax.
    r   rk   r   )r   partialr   )rz   r   rk   r   r8   r8   r9   register_lowering"  s    	r   c                 C   s   g }t jt| t|tddD ]v\} }|dkr>||  q"| dkrR|| q"tjj	| | t
t|jt
t| jk r|| q"||  q"tt|S )z
    Broadcasting logic based on symbolic shapes.

    We give the shapes 0 and 1 concrete values, while all other shapes
    are symbolic sympy formulas.
    r   )	fillvalue)	itertoolszip_longestreversedrc   Integerry   r3   graphsizevarsguard_equalsrm   expandfree_symbolsrB   )r   boutputr8   r8   r9   broadcast_symbolic_shapes4  s       r   c                    s   t dd | D s| S tdd | D rZ|p:t| dtjifdd  fdd| D S td	d | D }g }| D ]r}t|ttfr|	t
t|| | t|  qtt|tjr|	t|| |  qt|	| qt|S )
Nc                 s   s    | ]}t |tjttfV  qd S rM   )r@   rc   Exprr]   floatr<   r8   r8   r9   r   M  s     z$promote_constants.<locals>.<genexpr>c                 s   s    | ]}t |tttjfV  qd S rM   )r@   r]   r   rc   rd   r<   r8   r8   r9   r   O  s     rk   c                    s4   t | tjrt|  td S t|  td S d S rM   )r@   rc   rd   r   r$   r/   r   rf   r[   r8   r9   
const_funcT  s    z%promote_constants.<locals>.const_funcc                    s   g | ]} |qS r8   r8   r<   )r   r8   r9   r?   Z  s     z%promote_constants.<locals>.<listcomp>c                 s   s    | ]}t |ttfr|V  qd S rM   )r@   r+   r#   r<   r8   r8   r9   r   [  s      )r   r   rw   r   DEFAULTnextr@   r]   r   ry   r#   r   r   r   rb   r   rA   rn   rc   r   r$   )inputsoverride_return_dtypeexr   r>   r8   )r   r\   r9   promote_constantsL  s0     
r   c                    s*   d dt t d fdd}|S )Nalphar   c              	      sL  t |	}r:| d k	rF| dkrFt|}t|d | |d< n| d ksFtdd |D |d  	pn|d   t|d  jdk|dd  D ]B}t	|t
jstt| kstd d d|  q fd	d
}s0d }|D ] }| jdkr| } qq|s0|d  }p8|}tj| |dS )Nr   c                 S   s   g | ]}|  qS r8   make_loaderr<   r8   r8   r9   r?   |  s     z1make_pointwise.<locals>.inner.<locals>.<listcomp>r   cudazndim mismatch  c                    s   t  t ks$td  d tjkrLd k	rL fddD  S rtrttjkrt fddD  S  fddD  S d S )Nzwrong ndim r   c                    s   g | ]}| qS r8   r8   r=   loadindexr8   r9   r?     s     zCmake_pointwise.<locals>.inner.<locals>.inner_fn.<locals>.<listcomp>c                    s   g | ]}| qS r8   r8   r   r   r8   r9   r?     s     c                    s   g | ]}| qS r8   r8   r   r   r8   r9   r?     s     )rm   r_   rF   rh   float64r   )r\   rK   is_cudaloadersoverride_fn_when_cuda_float64override_fn_when_input_boolrangesr   r9   inner_fn  s    $z/make_pointwise.<locals>.inner.<locals>.inner_fndevicer\   r   r   )r   rA   mulr_   rn   rb   r/   r   typer@   r   BaseConstantrm   r(   r   )r   r   otherr   r   r~   allow_alpharK   override_devicer   r   r   )r\   r   r   r   r9   innert  s@    
	
zmake_pointwise.<locals>.innerr   r+   )rK   r   r   r   r   r   r   r8   r   r9   make_pointwisel  s    &-r   c                    s&   ddt t t  d fdd}|S )Nr   r   r   c                    sp  dd   fdd}d}t jjjD ](}|jD ]}|jdkrD|jtks,d}q,q"d }|D ]}t|tt	frT|} qpqT|d k	st
dg }|D ]2}t|tt	fs||gt|  q|| q|t| }	d gt| }
|	 D ]t\\}}}g }|D ]N\}}r|d	| i}n| }||
|< |jd
kr|r|r||  q|rt j| qtdd |
D slt
|
S )Nc                  W   s   t dd | D S )Nc                 s   s0   | ](}t |to&td d |j D V  qdS )c                 s   s   | ]}|j V  qd S rM   )r   r<   r8   r8   r9   r     s     zVmake_foreach_pointwise.<locals>.inner.<locals>.is_dynamic.<locals>.<genexpr>.<genexpr>N)r@   r+   r   datarn   )r=   tr8   r8   r9   r     s   
zLmake_foreach_pointwise.<locals>.inner.<locals>.is_dynamic.<locals>.<genexpr>)r   r   r8   r8   r9   
is_dynamic  s    z9make_foreach_pointwise.<locals>.inner.<locals>.is_dynamicc                    st   t t}t| D ]^\}} |  }d }|D ]}t|tr*|j } qHq*|d k	sXtd|||f ||f q|S )Nz.foreach op should have at least one tensor arg)	r   rA   r   r@   r+   r   r   r_   ry   )Z	arg_pairsr   r~   ru   use_foreachr   r   r   r8   r9   
group_args  s    


z9make_foreach_pointwise.<locals>.inner.<locals>.group_argsFZcall_functionTz1at least one input must be a list to a foreach opr   r   c                 s   s   | ]}|d k	V  qd S rM   r8   r<   r8   r8   r9   r     s     z8make_foreach_pointwise.<locals>.inner.<locals>.<genexpr>)r3   r   Zcurrent_nodeZusersoptargetr   r@   rA   rB   r_   ry   rm   r   itemsr   realizeZregister_listr   )r   r   r   Zrealize_outputsnodeuserZa_list_inputinputZbroadcast_inputsgroupsoutputsr   r   groupZbuffer_listZ
output_indru   r   r   pw_fnr   r9   r     sN    	
z%make_foreach_pointwise.<locals>.innerr   )r   r   r   r8   r   r9   make_foreach_pointwise  s    "Ir   r>   r\   c                    s8   |    kr|rt| S | S  fdd}t| d| S )Nc                    s   t |  S rM   )r2   r   rf   r[   r8   r9   	_to_dtype  s    zto_dtype.<locals>._to_dtyper   )rb   cloner   )r>   r\   copyr   r8   r[   r9   r     s    r   c                 C   s   t | |ddS NTr   r   r   r8   r8   r9   _convert_element_type  s    r   r   c                   sv   |    kr|rt| S | S dd }||   }| }||krZtd|    d  d fdd}t| d| S )	Nc                 S   s"   | j rt| jS t| jS d S rM   )Zis_floating_pointrF   Zfinfobitsiinfor[   r8   r8   r9   _get_primitive_bitwidth  s    z1to_dtype_bitcast.<locals>._get_primitive_bitwidthzbitcast z to different bitwidth type z is not supported yet.c                    s   t |  S rM   )r2   to_dtype_bitcastrf   r[   r8   r9   _to_dtype_bitcast  s    z+to_dtype_bitcast.<locals>._to_dtype_bitcastr   )rb   r   r5   r   )r>   r\   r   r   Zsrc_bitsZdst_bitsr   r8   r[   r9   r      s    r   c                 C   s   t | |ddS r   )r   r   r8   r8   r9   _view_dtype  s    r   r>   r   c                C   s8   t |}|  |kr$|r t| S | S ttj| |S rM   )r/   r   r   r+   r   r   Z
DeviceCopy)r>   r   r   r8   r8   r9   	to_device  s    r  c                 C   s   t | |ddS r   )r  r  r8   r8   r9   _device_put#  s    r  Tc	                 C   s   |p| j }t|}	|r"td| }
|dk	r2t|}t|	|||rB|
nd|d}	t| |||d|	}	tt|rttt|d|d|	 |	S )z3A pointwise function that maps ops.{name} to inputsZ
libdevice_N)r   r   r   r   r   )rk   r   )__name__r&   r   r   rl   primsrJ   )rz   namer   rk   r   r   r   r   use_libdevice_for_f64rK   Zfn_libdevicer8   r8   r9   register_pointwise(  s<    


r  c                 C   s   t ||d}t| |}|S )Nr   )r   r   )rz   Zpointwise_lowering_fnr   rK   r8   r8   r9   register_foreach_pointwiseR  s    
r
  )r   rk   c                    s  dd }t |ttfr"t||}t |ttfr<t||}| ||g t d  d tjd}dd t D }t|t	 fdd|D  D ]\}}| |< qt
t D ]8}t  | tjrt | t |d	    |< qt||d
 d	 t d |t d |S )Nc                  W   s
   t j|  S rM   )r2   wherer   r8   r8   r9   rK   ^  s    zwhere.<locals>.fnr   r   rj   c                 S   s   g | ]\}}t |tr|qS r8   r|   r}   r8   r8   r9   r?   j  s     
 zwhere.<locals>.<listcomp>c                    s   g | ]} | qS r8   r8   r   r   r8   r9   r?   k  s     r   r   )r@   r   r]   constant_likerw   r   r   r   r   r   r   rm   r   r   r#   r   rA   rn   r   r   )r6   r   r   rK   r\   r   r~   r>   r8   r   r9   r  \  s,    
  $
&
  r  c                  G   s   t | dkr*t| d ttfr*t| d  S ttdd | D g }g }| D ]H}| }t |t |ks~t	dd t
||D rt||}|| qJ|S )Nr   r   c                 S   s   g | ]}|  qS r8   rn   r<   r8   r8   r9   r?   z  s     z%broadcast_tensors.<locals>.<listcomp>c                 s   s2   | ]*\}}|d kr|d kp(|d ko(|d kV  qdS r   Nr8   r=   r   r   r8   r8   r9   r     s    z$broadcast_tensors.<locals>.<genexpr>)rm   r@   rA   rB   r   r   reducer   rn   r   r   r   ry   )r   r   r   r>   sizesr8   r8   r9   r   u  s       
r   c                 C   s   | S rM   r8   rf   r8   r8   r9   nop  s    r  
lift_freshc                 C   s   t | tst|d kr&tt| jS tt|  |}t	t |t
sJ|fn|}g }t|  D ]0\}}||krtjjt|ds`|| q`||  krt| |S | S Nr   )r@   r+   r_   r*   r   r   r
   rm   rn   rC   rB   r   r3   r   r   evaluate_exprrc   Eqry   view)r>   rq   dims	new_shapedsr8   r8   r9   squeeze  s    r  c                 C   s   t t| |S rM   )r   r  )r>   rq   r8   r8   r9   squeeze_copy  s    r  c                 C   s2   t | |}t| tstt|ts&t|j| _| S rM   )r  r@   r+   r_   r   r>   rq   valr8   r8   r9   squeeze_  s
    
r   c                 C   s2   t | rt| dtjdS td}t|tjd| S )NFr[   isinfr   rg   	full_likerF   rh   r&   r   r>   rK   r8   r8   r9   r!    s    r!  c                 C   s2   t | rt| dtjdS td}t|tjd| S )NFr[   isnanr   r"  r$  r8   r8   r9   r%    s    r%  c                 C   s$   t | rt| S td}t|| S )Nceilrg   r   r&   r   r$  r8   r8   r9   r&    s    r&  c                 C   s$   t | rt| S td}t|| S )Nfloorr'  r$  r8   r8   r9   r(    s    r(  c                 C   s$   t | rt| S td}t|| S )Nroundr'  r$  r8   r8   r9   r)    s    r)  c                 C   s$   t | rt| S td}t|| S )Ntruncr'  r$  r8   r8   r9   r*    s    r*  c                 C   s   t | g\} t| tjr(t| t|S t| ts6tt|t	tfsHtt| 
 t|kr`| S tjjt| 
 }|dkr| tjjt||  tt| jt|S r   )r   r@   r   r   r#   r   rB   r+   r_   rA   rn   r3   r   r   	size_hintr1   
mark_reuser   )r>   r  Zx_size_productr8   r8   r9   r     s    r   c                 C   sL   t |}|D ]}d||< q| }t|D ]\}}|dkr&t||}q&t||S )Nr   )rA   r   	unsqueezer   )r   shapeZbroadcast_dimensionsr  Zbroadcast_dimensionvidxr>   r8   r8   r9   broadcast_in_dim  s    
r1  c                 C   s   t | | S rM   )r   rn   r>   yr8   r8   r9   	expand_as  s    r4  c                    sR  t |   tt krJtdgtt      t| t  } tt|  ksbtt |  }d}ttD ](}| dkrd}|| |  ||< q~|rt|| 	 | 
 dS tdd t D rt| |S  fdd	}tjjt }|dkr,| tjjt||  |  tj| 
 | 	 |t |d
S )Nr   Fr   Tr\   r   c                 s   s"   | ]\}}|d kp|d kV  qdS r  r8   r  r8   r8   r9   r     s     zrepeat.<locals>.<genexpr>c                    sv   t | t kstt| } tt D ]D}| dkr( | dkrTtd| |< q(t| | d | | |< q(| S Nr   r   )rm   r_   rA   r   rc   r   r   )r   r~   Zold_sizerepeatsx_loaderr8   r9   r     s    zrepeat.<locals>.inner_fnr   )rA   rn   rm   rc   r   r  r_   r   emptyrb   r   r   r   r   r3   r   r   r+  r1   r,  r   r(   r   )r>   r8  new_sizeZzero_tensorr~   r   Zold_size_productr8   r7  r9   repeat  s8     

r<  c                 C   s2   t | tstt |ttfs ttt| j|S rM   )r@   r+   r_   rA   rB   r-   r   r   )r>   r  r8   r8   r9   r  4  s    r  c                 C   s6   t | tstt |ttfs ttt| jt|S rM   )r@   r+   r_   rA   rB   r'   r   r   )r>   r  r8   r8   r9   permute=  s    r=              c                 C   s|   t | tstt| |d}|  | }tjjt	
|| drDd}tjjt	
|| drbd}ttj| j||||S r   )r@   r+   r_   _validate_dimrn   r3   r   r   r  rc   Ltr   Z	SliceViewr   r   )r>   rq   startendstepdim_sizer8   r8   r9   slice_D  s    rE  c                    sv  t |ts|f}t |ts |f} fdd|D }t  dkrJt S t|}t|}|dksl|dkr|dkr|td|dkr|dkrt t  g}t||d}t|t	  S ||krtd| d| |dd }|dd }t |d |d }	t|	||S |\t
jj   |d     fd	d
}
tj    |
  dS )z
    This is based on torch._refs.roll(), but uses ModularIndexing().

    We can't use the ref here because it is based on multiple calls to
    torch.cat() that this will result in terrible code.
    c                    s   g | ]}t  |qS r8   )r?  r=   r  r   r8   r9   r?   ]  s     zroll.<locals>.<listcomp>r   r   z`shifts` requiredz*shifts and dimensions must align. shifts: z, dims: Nc                    s4   t | } t|   tdt| <  | S r  )rA   r   rc   r   r   r   )a_loaderrq   sizerA  r8   r9   rK   |  s    
  zroll.<locals>.fnr   )r@   r   r1   rn   r   rm   RuntimeErrorr  rollrA   r3   r   r   evaluate_static_shaper   r(   r   r   rb   )r   Zshiftsr  Z
len_shiftsZlen_dimsZflatZrolledZtail_shiftsZ	tail_dimsZfirst_dim_rolledrK   r8   )r   rH  rq   rI  rA  r9   rK  P  sF    	

rK  c              	   C   s   t | tr"t | jtjr"| j } |   t| sDtd|  dt	| \}}t
|j|jdd |D dd |D t|pd}tt||S )Nzunrealized as_strided(z, ...)c                 S   s   g | ]}t |qS r8   rc   r   r=   r  r8   r8   r9   r?     s     zas_strided.<locals>.<listcomp>c                 S   s   g | ]}t |qS r8   rM  rN  r8   r8   r9   r?     s     r   )r@   r+   r   r   ZBaseViewZunwrap_viewr   Zis_storage_and_layoutr5   Zas_storage_and_layoutFixedLayoutr   r\   rc   r   ZReinterpretView)r>   rI  stridestorage_offsetZstorageZ
old_layoutZ
new_layoutr8   r8   r9   
as_strided  s    

rR  c                 C   s$   t | tstt| |||j| _| S rM   )r@   r+   r_   rR  r   )r>   rI  rP  rQ  r8   r8   r9   as_strided_  s    rS  c                 C   s   t | |||}t|S rM   )rR  r   )r>   rI  rP  rQ  resultr8   r8   r9   as_strided_copy  s    rU  c                    s   t dd | D rZ| D ]}|  qt dd | D rJttjf|  \} }ttj| |S t| dkrrt| d S t| d |d}t	| dt
ji  fdd| D } ttj| |S )	Nc                 s   s   | ]}|  tjkV  qd S rM   )rb   rF   uint8r=   r   r8   r8   r9   r     s     zcat.<locals>.<genexpr>c                 s   s   | ]}t |jjd kV  qdS )rR   N)rm   layoutrI  rW  r8   r8   r9   r     s     r   r   rk   c                    s   g | ]}t | qS r8   r   )r=   rp   r[   r8   r9   r?     s     zcat.<locals>.<listcomp>)r   r   require_channels_lastatencatfallback_handlerrm   r   r?  rw   r   r   r+   r   ZConcatKernelr   )r   rq   r   rv   r8   r[   r9   r[    s     
r[  )offsetdim1dim2c                    s   |   ttdtdtkfdd tjjt	|d}|rxt
t |  d}nt
t  | d}d |r| df nd|f fddtD }||  fdd	}ttj| ||S )
N)r0  Zrankc                      s   d  d S )Nz(diagonal dimensions cannot be identical z, r8   r8   r^  r_  r8   r9   <lambda>      zdiagonal.<locals>.<lambda>r   )r   r   c                    s    g | ]\}}| fkr|qS r8   r8   )r=   r~   r  r`  r8   r9   r?     s      zdiagonal.<locals>.<listcomp>c                    s   | d }dgt  }d}tD ]L}|kr@| d  ||< q"|krZ| d  ||< q"| | ||< |d7 }q"|t d kst|S )Nr   r   r   r   )rm   r   r_   )r0  Zdiag_idxZoriginal_idxZcur_dimr  Zbase_idxr^  r_  Znum_dimsZoriginal_shaper8   r9   	reindexer  s    
zdiagonal.<locals>.reindexer)rn   rm   r	   r   r3   r   r   r  rc   r@  maxminr   ry   r+   r   GenericViewr   )r   r]  r^  r_  Zoffset_negativeZ	diag_sizer  rd  r8   rc  r9   diagonal  s(     
rh  c                 C   s   t t| |||S rM   )r   rh  )r   r]  r^  r_  r8   r8   r9   diagonal_copy  s    ri  c                 C   s$   t | }t||||}t|| |S rM   )r   rh  	mutate_to)r   srcr]  r^  r_  r   r   r8   r8   r9   diagonal_scatter  s    
rl  c                 C   s,   t ||  | }tt| |||d |S r  )r-   Zhandle_negative_indexrn   r  rE  )r>   rq   r0  r8   r8   r9   select  s    rm  c                 C   s   t | |d}tjj|  | }t|tjr<tjj|}t|t	tj
frb|g|| d |  }g }d}|D ]$}|| }|t| ||| |}qn|S Nr   r   )r?  r3   r   r   rL  rn   r@   rc   r   r]   r   ry   rE  )r>   r  rq   x_sizerT  rA  rI  rB  r8   r8   r9   split  s    rp  c                 C   s   t | ||S rM   )rp  )r>   r  rq   r8   r8   r9   split_with_sizes  s    rq  c                 C   sJ   t | |d}tjj|  | }g }t|D ]}|t| || q.|S r   )	r?  r3   r   r   rL  rn   r   ry   rm  )r>   rq   ro  rT  r~   r8   r8   r9   unbind  s    rr  c           
         s   |   }t|}t|| |dkr4tt| d|dS tjj}|||   |	d t
|  | d }| |t|| |   |d   |f| d d  |f} fdd}	ttj| ||	S )Nr   )rB  r   c                    s4   | d |     }| d   |f|  d d S )Nr   r   r8   )r0  Zdim_idxrq   rC  r8   r9   rd  5  s    zunfold.<locals>.reindexer)rn   rm   r	   rE  r-  r3   r   r   	guard_leqguard_ltr   r,  r+  r   r+   r   rg  r   )
r>   	dimensionrI  rC  r  ndimr   Znew_dim_sizeout_sizerd  r8   rs  r9   unfold#  s    
$ry  c                 C   s4   t | |d}t|  }||td t| |S r  )r?  rA   rn   insertrc   r   r  )r>   rq   r  r8   r8   r9   r-  <  s    r-  c                 C   s2   t | |}t| tstt|ts&t|j| _| S rM   )r-  r@   r+   r_   r   r  r8   r8   r9   
unsqueeze_D  s
    
r{  c                 C   sP   t |tstt|  }|dk r.||| 7 }d|  krF|| k sLn t|S r   )r@   r]   r_   rm   rn   )r>   rq   r]  rw  r8   r8   r9   r?  M  s    r?  r   c                 C   sT   t | |d}tjj|  | d }t| |d|}t| |||d }t|t|S )Nr   r   )	r?  r3   r   r   rL  rn   rE  r   sigmoid)r>   rq   Znew_lenr   r   r8   r8   r9   gluV  s
    r}  c                  C   s
  t jjrt jjjt jjjt jjjt jjjt	j
jt jjjg} tt jjjtttddd}tt jjjjttttddd}tt jjjjttttddd}tt jjjtttd	d
d}tt jjjjttttddd}tt jjjtttddd}tt	j
jtttttttttt tttttttddd}tt jjjd dtttttddd}tt jjjjd dttttttddd}	tt jjjd dtttttddd}
t jjr| t jjj tt jjjttttddd}t|  n d S ) N)r>   weightbiasc
           
      S   s$   t tj| |||||||||	
S rM   )r+   r   r   ZConvolutionUnary)
r>   r~  r  paddingrP  dilationr   attrscalars	algorithmr8   r8   r9   convolution_unaryk  s    z5register_onednn_fusion_ops.<locals>.convolution_unary)r>   r   r~  r  c                 S   s*   t tj| |||||||||	|
||S rM   )r+   r   r   ZConvolutionBinaryr>   r   r~  r  r  rP  r  r   binary_attrZbinary_alpha
unary_attrunary_scalarsZunary_algorithmr8   r8   r9   convolution_binary  s"    z6register_onednn_fusion_ops.<locals>.convolution_binaryc                 S   s*   t tj| |||||||||	|
||S rM   )r+   r   r   ZConvolutionBinaryInplacer  r8   r8   r9   convolution_binary_inplace  s"    z>register_onednn_fusion_ops.<locals>.convolution_binary_inplace)r>   wr   c              
   S   s   t tj| |||||S rM   )r+   r   r   ZLinearUnary)r>   r  r   r  r  r  r8   r8   r9   linear_unary  s    z0register_onednn_fusion_ops.<locals>.linear_unary)r>   r3  r  r   c              	   S   s   t tj| ||||S rM   )r+   r   r   ZLinearBinary)r>   r3  r  r   r  r8   r8   r9   linear_binary  s    z1register_onednn_fusion_ops.<locals>.linear_binaryc                 S   s&   t tj| |||||||||	|
S rM   )r+   r   r   ZConvolutionTransposeUnary)r>   r~  r  r  Zoutput_paddingrP  r  r   r  r  r  r8   r8   r9   convolution_transpose_unary  s    z?register_onednn_fusion_ops.<locals>.convolution_transpose_unaryr>   Zw0Zw1Zw2Zw3hxZcxreverseZbatch_sizesmodeZhidden_sizeZ
num_layersZ
has_biasesbidirectionalZbatch_firsttrainc                 S   s4   t tjtj| |||||||||	|
|||||S rM   )pytreetree_mapr+   r   r   ZMkldnnRnnLayerr  r8   r8   r9   mkldnn_rnn_layer  s*    z4register_onednn_fusion_ops.<locals>.mkldnn_rnn_layerrj   )r>   packed_weightw_scalew_zpr  c                 S   s2   t tj| |||||||||	|
||||||S rM   )r+   r   r   ZQConvPointWisePT2E)r>   x_scalex_zpr  r  r  r  rP  r  r  r   o_inv_scaleo_zero_pointfp32_outputr  r  r  r8   r8   r9   qconvolution_unary  s*    z6register_onednn_fusion_ops.<locals>.qconvolution_unary)r>   accumr  r  r  r  c                 S   s<   t tj| |||||||||	|
|||||||||||S rM   )r+   r   r   ZQConvPointWiseBinaryPT2E)r>   r  r  r  Zaccum_scaleZaccum_zpr  r  r  r  rP  r  r  r   r  r  r  r  r   r  r  Zunary_algorithmmr8   r8   r9   qconvolution_binaryH  s4    z7register_onednn_fusion_ops.<locals>.qconvolution_binaryc                 S   s*   t tj| |||||||||	|
||S rM   )r+   r   r   ZQLinearPointwisePT2E)r>   r  r  r  r  r  r  r  r  r  r  r  r  r8   r8   r9   qlinear_unary~  s"    z1register_onednn_fusion_ops.<locals>.qlinear_unary)r>   packed_worig_wr   c                 S   s.   t tj| |||}|d k	r*t||}|S rM   )r+   r   r   ZMKLPackedLinearrE   )r>   r  r  r   Z
batch_sizerT  r8   r8   r9   mkl_packed_linear  s    
z5register_onednn_fusion_ops.<locals>.mkl_packed_linear)rF   Z_CZ_has_mkldnnr2   ZmkldnnZ_convolution_pointwiseZ_convolution_pointwise_Z _convolution_transpose_pointwiseZ_linear_pointwiserZ  r  defaultZonednnZqconv2d_pointwiser   r+   binaryrh   r   r]   Zqlinear_pointwiseZhas_mklry   ZmklZ_mkl_linearr;   )Zcpu_needs_realized_inputsr  r  r  r  r  r  r  r  r  r  r  r8   r8   r9   register_onednn_fusion_ops`  s    
	!!  
()
 3!

r  c                    s   |rt    fdd}|S )Nc                     s   t tjtjj f| |S rM   )r  r  r+   r   r   FallbackKernelru   r   kernelr8   r9   handler  s     z!fallback_handler.<locals>.handler)r   rE   )r  add_to_fallback_setr  r8   r  r9   r\    s    
r\  c                   C   s   t d d S )NzjTorchinductor does not support code generation for complex operators. Performance may be worse than eager.)warningswarnr8   r8   r8   r9   _warn_complex_not_supported  s    r  r   c                 C   s   |   rt  dS dS )z0Do not support reading or writing to this tensorTF)Z
is_complexr  r  r8   r8   r9   unsupported_input_tensor  s    r  c                 C   s   t | rdS | jotjS )z2Do not support writing tensor but can read from itT)r  Zis_cpur   Zdisable_cpp_codegenr  r8   r8   r9   unsupported_output_tensor  s    r  )r   c                 C   sd   | j tjjkrdS | j tjjkr$dS dd }t| j| jfd D ]}||ddr@ dS q@|| ddS )NFc                 S   sn   t | tjjsdS d| jkr dS t| jd d D ]6}t |tjjsFq2|rZt|rh dS q2t	|r2 dS q2dS )NFr  r   T)
r@   rF   fxNodemetar   _subclasses
FakeTensorr  r  )r   	is_outputr  r8   r8   r9   check_skip_condition  s    
zCfallback_node_due_to_unsupported_type.<locals>.check_skip_conditionr   )r  T)r   rZ  view_as_complexr  lift_fresh_copyr   ru   r   )r   Zallow_cpu_inputsr  rs   r8   r8   r9   %fallback_node_due_to_unsupported_type  s    r  c                 C   s   | t kstd|  t| gr`|r`ttdr`tjjj	rPdtjj_	t
d td|  dt|  |d k	rzt| | t| d dt| S )Nz.both a fallback and a decomp for same kernel: CIFzmA make_fallback error occured in suppress_errors config, and suppress_errors is being disabled to surface it.zmake_fallback(a.  ): a decomposition exists, we should switch to it. To fix this error, either add a decomposition to core_aten_decompositions (preferred) or inductor_decompositions, and delete the corresponding `make_fallback` line. Get help from the inductor team if unsure, don't pick arbitrarily to unblock yourself.rj   )r!   r_   r"   rh   osgetenvrF   Z_dynamor   Zsuppress_errorslogwarningr;   rP   r   r\  )r  Zlayout_constraintr  r8   r8   r9   make_fallback  s"    



r  c                 C   s$   d}| D ]}|| }qt |tjdS )z
    TorchInductor offset calculation differs from PyTorch eager offset
    calculation for random ops (tl.rand vs torch.rand). In future, we should
    strive for same impl for tl.rand and torch.rand.
    r   r[   tensorrF   int64)r.  Znumelr  r8   r8   r9   philox_rand_offset  s    
r  c           	         sd   t | | t j|  | |  fdd}tj| |t| d}t	| }||fS )Nc                    sV   t g tj}t g tj}t t | tj|}t ||}t | S rM   )r2   r   rF   int32rE   
index_exprrand)r   Zseed_index_exprZoffset_index_exprZrand_index_exprrT  r\   Zoffset_loader
random_posseed_loaderr8   r9   r   8  s     zphilox_rand.<locals>.inner_fnr   )
r   rO  FlexibleLayoutcontiguous_stridesmake_indexerr   r(   r   rA   r  )	rI  seedr]  rP  r   r\   r   Zrandom_values_nodeZoffset_noder8   r  r9   philox_rand+  s"    
r  c              	   C   s0   t jr$ttjtjtj	| ||S t
dd S )Nz&should be handled in replace_random.py)r   fallback_randomr  r  r+   r   r   r  rZ  native_dropoutr_   )r>   pr  r8   r8   r9   r  R  s     r  c                 G   s<   t js |  tdks td|   tj| f|  | S NcpuzTthis should be handled in decomps unless config.fallback_random or the device is CPU)	r   r  r   rF   r   r_   r   r   ZInplaceBernoulliFallbackr>   ru   r8   r8   r9   
bernoulli_\  s    r  c                 G   s2   t js |  tdks tdtt| f| S r  )r   r  r   rF   r   r_   r  r   r  r8   r8   r9   bernoulli_pf  s    r  c                 C   s
   t  d S rM   r_   rv   r8   r8   r9   _foobaro  s    r  c                 C   s   t d d S )Nz1using triton random, expect difference from eager)r  info)saltr8   r8   r9   _warn_triton_randomt  s    r  c                   C   s   t tjj d S rM   )r  r3   r   Zcreation_timer8   r8   r8   r9   warn_triton_randomy  s    r  c                  O   s,   t js|dd d k	r t| |S tdd S N	generatorz-should have been handled in replace_random.py)r   r  getfallback_randr_   r  r8   r8   r9   r    s    
r  c                  O   s,   t js|dd d k	r t| |S tdd S r  )r   r  r  fallback_randnr_   r  r8   r8   r9   randn  s    
r  c                 C   s   t |}t j| |S rM   )r   get_stride_orderExternKernelrequire_stride_order)Zinput_tensorrP  stride_orderr8   r8   r9   inductor_force_stride_order  s    
r  r   c                 C   s   t dd S )Nz.should be handled in fuse_seed_creation_pass()r  r  r8   r8   r9   inductor_seed  s    r  c                 C   s   t   tt| t|S rM   )r  r+   r   r   ZRandomSeedsr/   )countr   r8   r8   r9   inductor_seeds  s    r  c                    s(    fdd}t j  |g dS )Nc                    s   t   S rM   )r2   Z	load_seedget_namer  r   seedsr8   r9   r     s    z&inductor_lookup_seed.<locals>.inner_fnr   )r(   r   r   rb   )r  r   r   r8   r  r9   inductor_lookup_seed  s    r  r]  )rI  r  r  r]  c                   s   t jr
t dkst| } tj}| }tj||| tj	| |d
 |  fdd}tj|||| d}|  |S )N)r  r  r  c                    s"   t t g t| tjS rM   )rJ   r2   r  rF   r  r   r  r  r  r8   r9   r     s    z!inductor_random.<locals>.inner_fnr   )r   r  r_   rF   float32r   r   rO  r  r  r  r   r(   r   r   )rI  r  r  r]  r\   r   r   rT  r8   r  r9   inductor_random  s,    
   
 
r  )lowhighrI  r  r]  c                   sl   t jr
t|}tj}| }tj|||tj	||d
 |  fdd}tj||||dS )Nr  c                    s"   t g t | tj S rM   )r2   Z	randint64r  rF   r  r   r  r  r  r  r8   r9   r     s    z"inductor_randint.<locals>.inner_fnr   )r   r  r_   rF   r  r   r   rO  r  r  r  r   r(   r   )r  r  rI  r  r]  r\   r   r   r8   r  r9   inductor_randint  s&    
   
 
r  	out_int32right)r   
boundariesr  r  c                   s   t   dkstt| r$t s>ttjdd|  |dS      d   }| 	 }|  |rtt
jnt
j fdd}tj|||  dS )	Nr   F)r  r   r   c                    s"   | }t |  }|S rM   )r2   	bucketizer  )r   r  r   r  Zboundaries_sizeZindex_dtypeZinput_loaderr  r8   r9   r     s    zbucketize.<locals>.inner_fnr   )rm   rn   r_   r%   r\  rZ  r  r   r   r   rF   r  r  r(   r   )r   r  r  r  Zboundaries_loaderr   r   r8   r  r9   r    s*       r  c                 O   s$   t tjdd ||f\}}||fS )Nc                 S   s   t j| S rM   )r   r  Zrequire_stride1r  r8   r8   r9   ra    rb  zrequire_dense.<locals>.<lambda>r  Ztree_map_onlyr   IRNoderv   ru   r   r8   r8   r9   require_dense  s      r	  c                 O   s$   t tjdd ||f\}}||fS )Nc                 S   s   t j| S rM   )r   r  require_contiguousr  r8   r8   r9   ra     rb  z$require_contiguous.<locals>.<lambda>r  r  r8   r8   r9   r
    s      r
  c                 O   s$   t tjdd ||f\}}||fS )Nc                 S   s   t j| S rM   )r   r  rY  r  r8   r8   r9   ra  '  rb  z'require_channels_last.<locals>.<lambda>r  r  r8   r8   r9   rY  %  s      rY  c                    sF   dd  t  fddt|jD } fdd| D }||fS )Nc                 S   s2   t | tjr.t|jd  }tj| |S | S )Nr  )r@   r   r  r  r  rP  r  r  )rs   fx_argr  r8   r8   r9   apply_constraint-  s    z1constrain_to_fx_strides.<locals>.apply_constraintc                 3   s   | ]\}} ||V  qd S rM   r8   )r=   rs   r  )r  r8   r9   r   3  s    z*constrain_to_fx_strides.<locals>.<genexpr>c                    s"   i | ]\}}| |j | qS r8   )r   )r=   kr/  r  fx_noder8   r9   r   6  s      z+constrain_to_fx_strides.<locals>.<dictcomp>)rB   r   ru   r   )r  ru   r   r8   r  r9   constrain_to_fx_strides,  s    
r  ztorchvision::roi_align)r  c                 C   sn   |}|   |  kr"t||   }|  | kr@t||  }|  | krft||  }t|S t|S rM   )r   r  rb   r   rn   r   r   )selfrk  non_blockingr>   r   r8   r8   r9   r     s    r   )memory_formatc                C   s&   t j|  |  |  t|  dS Nr   )r(   r   r   rb   r   rA   rn   )r>   r  r8   r8   r9   r     s    
r   r  c                   s(    fdd}t jt| || gdS )Nc                    s   t j| d    dS )Nr   r[   r2   r  r   r\   rA  rC  r8   r9   rK     s    ziota.<locals>.fnr   )r(   r   r/   )lengthrA  rC  r\   r   Zrequires_gradrK   r8   r  r9   iota  s    
r  )rq   r   c                    s   |   |  kst|  t|  d tjjt	drN| 
    tjjd tjj| 
    tt| | 
 }|  fdd}tj|  |   |t| 
 dS )Nr   c              	      s6   t t t |   tjt tj| | S rM   )r2   r  eqr  rF   r  r0  rq   r   
src_loaderr9  r8   r9   r   %  s    z select_scatter.<locals>.inner_fnr   )rb   r_   r   r?  r3   r   r   r  rc   r@  rn   rt  ru  r   r-  r(   r   r   rA   )r>   rk  rq   r   r   r8   r  r9   select_scatter  s     

r  c                    s(    |  kst t d    d k	rZtjjt	
drZ d k	rtjjt	
dr d krdd kstjj   rt }tt	 t	| < t||}|  fdd}tj   |t dS )Nr   c              
      s6  dkr kr dkr | S t |  tj}t|  t|    < g }dkr~|t |t t	tj kr|t 
|t t	tj dkr|t t t|   dtjt dtjj |sttt j|}t | fddtrdnd}t ||| S )Nr   r   c                      s    S rM   r8   r8   )src_idxr  r8   r9   ra  p  rb  z1slice_scatter.<locals>.inner_fn.<locals>.<lambda>        )r2   r  rF   r  rA   r   ry   gerc   r   ltr  r   constantr_   r   r  and_maskedrg   r  )r0  Zidx_dimmaskZsrc_valrq   rD  rB  r  rA  rC  r>   r9  )r  r9   r   K  sT     zslice_scatter.<locals>.inner_fnr   )rb   r_   r   r?  rn   r3   r   r   r  rc   r@  Zstatically_known_leqrA   r   r   r(   r   r   )r>   rk  rq   rA  rB  rC  Zsrc_sizer   r8   r&  r9   slice_scatter7  s.     
.
r'  c                 C   s*   t | ttfr&t| dkr&t| d S | S r   )r@   rA   rB   rm   _unwraprf   r8   r8   r9   r(    s    r(  r\   r   rX  
pin_memoryc                   s  t |d tjfkd|  t | d tt tr@p<tjnpJt g }t tj	rl fdd}nt t
tfr fdd}nft dkst d t
tfrt dkr|tt   fdd}ntjtj |d	S tjt|||d
S )Nlayout=r*  c                    s   t  S rM   r  r   r   r\   r8   r9   r     s    ztensor.<locals>.inner_fnc                    s   t  S rM   r2   r"  r   r,  r8   r9   r     s    r   rV   c                    s8    fdd t dkr*tdS  dt S )Nc              	      sr   | |k st ||  dkr(t|  S ||  d |  }tttd tjt|tj | | ||S )Nr   r   r   )r_   r2   r"  r  r!  r  rF   r  )rA  rB  mid)binary_searchr   r\   r   r8   r9   r/    s    z/tensor.<locals>.inner_fn.<locals>.binary_searchr   )rm   r2   r"  r   r,  )r/  r   r9   r     s    r5  r   )r:   rF   stridedr@   r(  r]   r  get_default_dtyperc   r   r   rm   ry   r   r3   r   Zadd_tensor_constantr  r(   r   r/   )r   r\   r   rX  r*  r   r   r8   r,  r9   r    s,    *r  c                 C   s@   t | tr2|d k	rt| |} |d k	r.t| |} | S t| ||dS )Nr5  )r@   r+   r   r  r  )r   r\   r   r8   r8   r9   	as_tensor  s    


r2  c                 C   s   t | tjdS )Nr[   r  r   r8   r8   r9   long_tensor  s    r4  c                 C   s   t  S rM   )r   ZDynamicScalarr3  r8   r8   r9   _local_scalar_dense  s    r5  c                    s   | t | ttfs"tdr"jt ttfr@ fdd}nDt tjr\ fdd}n(t dkspt	
 fdd}tj| |t|dS )Nr   c                    s   t  S rM   r-  r   r\   r   r8   r9   r     s    z_full.<locals>.inner_fnc                    s   t  S rM   r  r   r6  r8   r9   r     s    r   c                    s    g S rM   r8   r   )value_loaderr8   r9   r     s    r   )r@   r]   r   rl   r   rc   r   rm   rn   r_   r   r(   r   rA   )
fill_valuer   r\   rI  r   r8   )r\   r   r7  r9   _full  s     r9  c                 K   s   t t|| f|S rM   create_tensor_liketensor_constructor)r>   r8  r   r8   r8   r9   r#    s    r#  c                    s    d d d d dd d fdd
}|S )NF)namesr\   r   rX  r*  r  c                    s   t | d kd t |d tjfkd|  t | d t|}|pFt }t|dkrvt|d tttj	frvt|d }dd |D }t
 |||S )Nnamed tensorsr+  r*  r   r   c                 S   s   g | ]}t |qS r8   rM  rN  r8   r8   r9   r?   
	  s     z5tensor_constructor.<locals>.inner.<locals>.<listcomp>)r:   rF   r0  r/   r1  rm   r@   rA   rB   Sizer9  )r=  r\   r   rX  r*  r  rI  r8  r8   r9   r     s    	"z!tensor_constructor.<locals>.innerr8   )r8  r   r8   r@  r9   r<    s    r<  )r=  r\   rX  r   r*  r  c                 G   sX   t | d kd t|}t|dkrDt|d tttjfrDt|d }t|d ||||dS )Nr>  r   r   r\   rX  r   r*  )	r:   r/   rm   r@   rA   rB   rF   r?  empty_strided)r=  r\   rX  r   r*  r  rI  r8   r8   r9   r:  	  s    
"     r:  c                    s   dddddd fdd
}|S )zZ
    Shim to convert X_like(...) into X(...).  For example zeros_like() into zeros().
    NF)r\   r   rX  r*  r  c                   sj   t | d t |d tjfkd|  |d kr8|  }nt|}|pJ|  }t|  } |||||dS )Nr*  r+  r)  )r:   rF   r0  rb   r`   r   rA   rn   )r>   r\   r   rX  r*  r  rI  creation_fnr8   r9   _constant_like(	  s    
    z*create_tensor_like.<locals>._constant_liker8   )rD  rE  r8   rC  r9   r;  #	  s        r;  c                 C   s   t t| S rM   r:  r@  r8   r8   r9   r  :	  s    r  c                    s   d d d d d fdd
}|S )NrA  c                   sp   t |ttfstt| d t|d tjfkd|  t|pF|  }|pR| 	 }dd |D }t
 |||S )Nr*  r+  c                 S   s   g | ]}t |qS r8   )rc   r   rN  r8   r8   r9   r?   L	  s     z7new_constant.<locals>._new_constant.<locals>.<listcomp>)r@   rA   rB   r_   r:   rF   r0  r`   rb   r   r9  r>   rI  r\   rX  r   r*  r@  r8   r9   _new_constantD	  s    z#new_constant.<locals>._new_constantr8   )r8  rG  r8   r@  r9   new_constantC	  s       rH  rA  c                C   s4   |d kr|   }|d kr |  }t|d ||||dS NrA  rb   r   rB  rF  r8   r8   r9   	new_emptyR	  s         rK  c                C   s   t | ttfstt |tttd fs*tt| d t|d tjfkd|  t|p^t	 }|pnt
dj}td||| d}|  |jj}dgt|  |j_t |tjstdd | D } |rdd |D n
tj| }tj||| |d	|_|S )
Nr*  r+  r  r   )r8  r   r\   rI  c                 S   s   g | ]}t |qS r8   rM  rN  r8   r8   r9   r?   m	  s     z!empty_strided.<locals>.<listcomp>c                 S   s   g | ]}t |qS r8   rM  rN  r8   r8   r9   r?   o	  s     )r   r\   rI  rP  )r@   rA   rB   r_   r   r:   rF   r0  r`   r1  r  r   r9  r   r   rm   r   r   ComputedBufferr  r  rO  rX  )rI  rP  r\   rX  r   r*  Z	pointwisebufferr8   r8   r9   rB  ]	  s.    
rB  c                C   s4   |d kr|   }|d kr |  }t||||||dS rI  rJ  )r>   rI  rP  r\   rX  r   r*  r8   r8   r9   new_empty_strided|	  s         rN  c                 C   s2   dd |D }t tt||jd}tj| |S )Nc                 S   s   g | ]}t jj|qS r8   )r3   r   r   r+  rN  r8   r8   r9   r?   	  s     z copy_strided.<locals>.<listcomp>)key)sortedr   rm   __getitem__r   r  r  )r>   rP  r  r8   r8   r9   copy_strided	  s    rR  c                 K   s6   | d}|d k	r|n
tt||d< t|| f|S )Nr\   )r  r   r   r<  )rI  r8  r   r\   r8   r8   r9   full	  s    
rS  c                    s   t | tst| tjks t|  tdk}t|  | | 	 |	  fdd}t
j|  |  || dS )Nr   c                    s4   t | } t| dkr,t|   |  < | S r   )rA   rm   r2   indirect_indexingr  rq   index_loaderrI  r9  r8   r9   rK   	  s    zgather.<locals>.fnr   )r@   r+   r_   rb   rF   r  rn   rm   r?  r   r(   r   r   )r>   rq   r   Zsparse_gradr]  rK   r8   rU  r9   gather	  s    rW  c                    s   |rt t| tst t|ts$t dt| ks8t |  |  t| |  | dd   fdd}tj	| 
 |  |dS )Nr]   r   c                    sZ   t | t ks"t|  d  | d  }t|d g| d   }|S )Nz != r   )rm   r_   r2   rT  )r0  Z	var_indexZ
weight_idxindices_loaderZindices_ndimr;  Zweight_loaderZweight_sizer8   r9   rK   	  s    "
zembedding.<locals>.fnr   )r_   r@   r+   strrb   r   rm   rn   r(   r   r   )r~  r   Zpadding_idxZscale_grad_by_freqsparserK   r8   rX  r9   	embedding	  s     r\  c           
         sB  t dd  D s*tddd  D  tdd  D rDtddd t D }t|d	ksjtd
d gt  }t|t fdd|D  D ]0\}}| |krtd|||< t|	 }qd	}t
|}|r|d d kr|  q|r|d	 d kr|d	 |d7 }qtdd |D r0td|| }	|||	fS )Nc                 s   s2   | ]*}|d k	r|  tjtjtjtjfkV  qd S rM   )rb   rF   r  r  rh   rV  r   r8   r8   r9   r   	  s   z.check_and_broadcast_indices.<locals>.<genexpr>z)indices must be int64, byte or bool. Got c                 S   s   g | ]}|d k	r|  qS rM   r   r   r8   r8   r9   r?   	  s      z/check_and_broadcast_indices.<locals>.<listcomp>c                 s   s*   | ]"}|d k	r|  tjtjfkV  qd S rM   )rb   rF   rh   rV  r   r8   r8   r9   r   	  s     zFallback for bool indicesc                 S   s   g | ]\}}t |tr|qS r8   r|   r}   r8   r8   r9   r?   	  s     
 r   z"requires at least 1 non-None indexc                    s   g | ]} | qS r8   r8   r   r   r8   r9   r?   	  s     z.Fallback when indices is on a different devicer   r   c                 s   s   | ]}|d kV  qd S rM   r8   r   r8   r8   r9   r   	  s     z.Fallback when None is in the middle of indices)r   r_   r   r5   r   rm   r   r   r   rn   rA   pop)
r   r   Z
valid_idxsZnew_indicesr~   r>   Z
output_dimstart_offsettmp
end_offsetr8   r]  r9   check_and_broadcast_indices	  s8    
$


rb  c                    s   t ttfst|  t|  \dd D }dd D t|d |  fddtt	D dkrdkrt
dd  t	 d   fdd}tj|  |  |d	S )
Nc                 S   s   g | ]}|d k	r|  qS rM   r  r   r8   r8   r9   r?   	  s      zindex_impl.<locals>.<listcomp>c                 S   s   g | ]}|d k	r|  qS rM   r   r   r8   r8   r9   r?   	  s      r   c                    s    g | ]} | d k	r| qS rM   r8   r   r   ro  r8   r9   r?   
  s      z0index is out of bounds for dimension with size 0c                    sh   t  t kstt t ks(t fddtD } d  | d  }|S )Nc                    s,   g | ]$\}}t j| | d qS r   r2   rT  r=   loaderrI  )r   ra  r0  r_  r8   r9   r?   
  s     z*index_impl.<locals>.fn.<locals>.<listcomp>rm   r_   r   )r0  	new_index)r   ra  indexed_sizeindices_loadersoutput_sizer_  r9  r  r9   rK   
  s    zindex_impl.<locals>.fnr   )r@   rA   rB   r_   r   rb  r   rn   r   rm   
IndexErrorr(   r   rb   )r>   r   r   indices_sizesrK   r8   )	r   ra  rk  r   rl  rm  r_  r9  ro  r9   
index_impl	  s0     

rp  c                 C   sB   zt | |ddW S  tk
r<   |   ttj| | Y S X d S NTre  )rp  r5   r   r\  rZ  r   r>   r   r8   r8   r9   r   !
  s
    r   c                 C   s   t | |ddS NFre  )rp  rr  r8   r8   r9   _unsafe_index+
  s    rt  c                 C   s   t t| |||S rM   )
index_put_r   r>   r   r   
accumulater8   r8   r9   	index_put8
  s    rx  c                 C   s   t t| |||ddS rs  )index_put_impl_r   rv  r8   r8   r9   _unsafe_index_put=
  s    rz  c                 C   sB   |  |   krt||   }|r,t| |}t| t|d || S r   )r   r  rE   rj  r  )r  r   r   rw  r8   r8   r9   index_put_as_masked_fillB
  s
    
r{  c                 C   s4   t |r |dkst r dtj_t| ||| | S ra   )r%   rF   $are_deterministic_algorithms_enabledr3   r   Zdisable_cudagraphsr   ZIndexPutFallbackr  r   r   rw  r8   r8   r9   index_put_fallbackJ
  s    r~  c                 C   s   t | |||ddS rq  )ry  r}  r8   r8   r9   ru  S
  s    ru  c                    s  |  dkrrtdkrrd  tjtjhkrrd }tt| t|  D ]}t|d}qRt	| |g||S t
 rt| ||S D ]2}|d k	r| tjtjhkrt| ||  S q|  t}|  tjtjhkr |dkrt| dg} t| ||} |dkrt| g } | S t||  }zt|  \W n$ tk
rj   t| || Y S X dd D }	dd D t| tst|   |dkrt| dg} t|	d }
d  |
t|	 d  fddttD t|} fdd	}tj|  |  | ||rDd
nd d}td t| |}tj||_ |dkrt| g } | S )Nr   r   r   c                 S   s   g | ]}|d k	r|  qS rM   r  r   r8   r8   r9   r?   
  s      z#index_put_impl_.<locals>.<listcomp>c                 S   s   g | ]}|d k	r|  qS rM   r   r   r8   r8   r9   r?   
  s      c                    s    g | ]} | d k	r| qS rM   r8   r   rc  r8   r9   r?   
  s      c                    sP   t  t kst fddtD } d  | d  }|S )Nc                    s,   g | ]$\}}t j| | d qS rd  rf  rg  )r   ra  r   r_  r8   r9   r?   
  s     z;index_put_impl_.<locals>.output_indexer.<locals>.<listcomp>ri  )r   rj  )r   ra  expected_vals_sizerk  rl  r_  r   r9   output_indexer
  s    z'index_put_impl_.<locals>.output_indexer
atomic_addr   r\   r   r   r  Zscatter_mode)!	get_numelrm   rb   rF   rh   rV  r   rn   r-  r{  r|  r~  r  r  r   rb  r   r5   r@   r+   r_   r   rA   r   r   Scatterr   rL  MutationLayoutr3   r   register_bufferr  )r  r   r   rw  r   r%  rv   r   Zx_ndimro  rm  r  scatterrM  r8   )r   ra  r  rk  r   rl  r_  ro  r9   ry  X
  s~    



 




ry  c                 C   s$   t | }t||||}t|| |S rM   )r   rR  copy_)r  rk  rI  rP  rQ  r   Zoutput_viewr8   r8   r9   as_strided_scatter
  s    
r  )rq   c                 K   s   t t| |||f|S rM   )scatter_r   )r>   rq   r   rk  r   r8   r8   r9   r  
  s    r  r  include_self)rq   r  r  c             	   C   s`   | dkrdnd}|d |hks@||kr8|  tjtjhks@t r\tj| ||||||d |S d S )Naten.scatter_rE   sumr  )rb   rF   rh   r  r|  r   ZScatterFallback)rK   r  rq   r   rk  r  r  Z	reduce_tyr8   r8   r9   scatter_fallback
  s(    

      r  r  )rq   r  c                C   sR   |dkst td| ||||d}|r(|S |dkr6d}n|dkrBd}t| ||||S )N>   rE   Nmultiplyr  r  rE   r  r  prod)r_   r  scatter_reduce_)r  rq   r   rk  r  fallback_resultr8   r8   r9   r  
  s          r  c                 C   s   t t| |||S rM   )scatter_add_r   r>   rq   r   rk  r8   r8   r9   scatter_add
  s    r  c                 C   s   t t| |||dS )Nr  r  r   r  r8   r8   r9   r  
  s    r  c                 K   s   t t| ||||f|S rM   r  )r>   rq   r   rk  reduction_typer   r8   r8   r9   scatter_reduce
  s    r  )r  )rq   r  c             	      s  |dkst td |||d}|r*|S tts8t dt| ksLt t }|dkrltdgttrt dkrtdgt|trt| dkrt|dg}t	  
  | ttr nd  fdd}fd	d
}	dd }
|sbtj  fdd| |d d}td t|}tj||_tj  |	| ||
|d}td t|}tj||_|dkrtg S )N>   Namaxr  r  meanaminzaten.scatter_reduce_r  r]   r   r   c                    s@     }t|}t| }t| |dkr.dn|  | < |S rn  )rn   rm   rA   r2   rT  )r0  r.  rw  Zindirect_idx)rq   rV  r  r8   r9   r     s     z'scatter_reduce_.<locals>.output_indexerc                    s    r| S t   S d S rM   r2   r"  rb   r  )r  rk  r  r8   r9   rK   *  s    zscatter_reduce_.<locals>.fnc                 S   s    | dkrdS | d kst d S d S )Nr  r  r  r  r8   r8   r9   backend_reduce_str1  s    z+scatter_reduce_.<locals>.backend_reduce_strc                    s   t d  S r   r  r   )r  r8   r9   ra  >  rb  z!scatter_reduce_.<locals>.<lambda>r  )r_   r  r@   r+   rZ  rb   rm   rn   r  r?  r   r   r   r  r   rL  r  r3   r   r  r  )r  rq   r   rk  r  r  r  rw  r  rK   r  Zzero_outrM  r  r8   )rq   rV  r  rk  r  r9   r  
  sx    





r  .)scales_xnc           	         s   |    |  |   d   |  d   }dd  D  t|ksRt|}dd t |D tD ]\}}|rr||< qrdd  fdd}tj| 	 | 
 |||dS )	Nc                 S   s   g | ]}t jj|qS r8   )r3   r   r   rL  r   r8   r8   r9   r?   h  s     z&upsample_nearestnd.<locals>.<listcomp>c                 S   s   g | ]\}}|| qS r8   r8   )r=   r~   or8   r8   r9   r?   m  s     c                 S   sB   t | tj} t | t |tj} t | tj} t j| |ddS rs  )	r2   r  rF   r  r   r"  r   r  rT  )r>   scalerI  r8   r8   r9   scale_fnr  s    z$upsample_nearestnd.<locals>.scale_fnc                    s>   |  d  }| d   }|fddt | D S )Nc                    s   g | ]\}}} |||qS r8   r8   )r=   r~   r  rI  )r  r8   r9   r?   |  s     z2upsample_nearestnd.<locals>.fn.<locals>.<listcomp>)r   )r0  r>   r   Zi_sizesr  r  scalesr9  r8   r9   rK   x  s
    zupsample_nearestnd.<locals>.fnr   )realize_hintr   rn   rm   r_   r   r   r(   r   r   rb   )	r>   rm  r  r  batchZo_sizesr~   r  rK   r8   r  r9   upsample_nearestnda  s&    
r  )r  c                 C   s   t | ||fddS )Nr   r  r  )r>   rm  r  r8   r8   r9   upsample_nearest1d  s    r  )scales_hscales_wc                 C   s   t | |||fddS )Nr   r  r  )r>   rm  r  r  r8   r8   r9   upsample_nearest2d  s    r  )scales_dr  r  c                 C   s   t | ||||fddS )NrQ   r  r  )r>   rm  r  r  r  r8   r8   r9   upsample_nearest3d  s    r  c                    s   t  fdd|D S )Nc                 3   s   | ]}t | V  qd S rM   r-  r   r[   r8   r9   r     s     z$_create_constants.<locals>.<genexpr>rB   )r\   ru   r8   r[   r9   _create_constants  s    r  )align_cornersr  r  c              
      s   |    |  |  \}}	
|\}}tjj		tjj

dd ddd}	dd dd d	d
 fddfdd|		| ||	
| |dd  	
f
dd}
tj| 	 | 
 |
||t|t|gdS )Nc                 S   s   | t t jjkrt jS t jS rM   )rF   r   r  re  r  )maxvalr8   r8   r9   get_int_dtype  s    z1upsample_bicubic2d_default.<locals>.get_int_dtypec                 S   sD   |r |dkr| d |d  S dS |d k	r8|dkr8d| S | | S d S r6  r8   )Zin_sizerx  r  r  r8   r8   r9   compute_scale  s    z1upsample_bicubic2d_default.<locals>.compute_scalec                 S   sN   t |tj}t | tj} |r,t | |S t dtj}| ||  | S d S )N      ?)r2   r  rF   r  r"  r   )r  Z	dst_indexr  Zdst_index_ieZhalfr8   r8   r9   compute_source_index  s    z8upsample_bicubic2d_default.<locals>.compute_source_indexc                 S   s8   t |d |d dtjd\}}}||  | |  |  | S )Nr   rQ   r   r[   r  rF   r  )r>   AZ_Ap2Z_Ap3_1r8   r8   r9   cubic_convolution1  s     z6upsample_bicubic2d_default.<locals>.cubic_convolution1c                 S   sD   t |d| d| d| tjd\}}}}||  | |  | |  | S )NrR   rS   rV   r[   r  )r>   r  Z_AZ_4AZ_5AZ_8Ar8   r8   r9   cubic_convolution2  s        z6upsample_bicubic2d_default.<locals>.cubic_convolution2c                    sb   d}t dtj}t | ||} | |}t || } ||}t |||}||||fS )Ng            ?)r2   r"  rF   r  rE   sub)r   r  r  Zc0c1Zx2c2c3)r  r  r8   r9   get_cubic_upsample_coefficients  s    

zCupsample_bicubic2d_default.<locals>.get_cubic_upsample_coefficientsc                    sH    |}| d |d  | d |d   | d |d   | d |d   S )Nr   r   r   rQ   r8   )Zxsr   cs)r  r8   r9   cubic_interp1d  s    z2upsample_bicubic2d_default.<locals>.cubic_interp1dc                 S   s   t |t || S rM   )r2   maximumminimum)r/  rf  re  r8   r8   r9   clamp  s    z)upsample_bicubic2d_default.<locals>.clampc           
         s   | \ }}
|}t |}t ||
|}t |}t ||} 	fddt |d t |d tfdddD }tfdddD fdd	tfd
d|D }	|	|S )Nc                    sr   t dtj}t d tj}t d tj}t j| ||dd}t j|||dd} ||gS )Nr   r   Fre  )r2   r"  rF   r  rT  )fyr  Z_0ZiHm1ZiWm1iyix)cr  iHiWr  r9  r8   r9   load_bounded  s    z<upsample_bicubic2d_default.<locals>.fn.<locals>.load_boundedr   c                 3   s   | ]}t  |V  qd S rM   r2   rE   r=   Zofs)r  r8   r9   r     s     z9upsample_bicubic2d_default.<locals>.fn.<locals>.<genexpr>)r   r   r   r   c                 3   s   | ]}t  |V  qd S rM   r  r  )r  r8   r9   r     s     c                    s"   t  fddD }|S )Nc                 3   s   | ]} |V  qd S rM   r8   r<   )r  r3  r8   r9   r     s     zOupsample_bicubic2d_default.<locals>.fn.<locals>.get_x_interp.<locals>.<genexpr>r  )r3  Zcoeffs_x)r  ixs_ofsr  t_x)r3  r9   get_x_interp   s    z<upsample_bicubic2d_default.<locals>.fn.<locals>.get_x_interpc                 3   s   | ]} |V  qd S rM   r8   )r=   r3  )r  r8   r9   r     s     )r2   r(  r  r   rB   )
r0  ZoyoxZreal_xZin_xZreal_yZin_yZt_yZiys_ofsZcoeffs_y)
r  r  r  r  r  height_scaler  r  width_scaler9  )r  r  r  r  r  r  r  r  r9   rK     s    

	z&upsample_bicubic2d_default.<locals>.fnr   )N)r  r   rn   r3   r   r   rL  r(   r   r   rb   rc   r   )r>   rm  r  r  r  NCZoHZoWr  rK   r8   )r  r  r  r  r  r  r  r  r  r  r  r  r9  r9   upsample_bicubic2d_default  s.    
	 r  c              
      s   t |dkst|\}}|  |  ^ } tjj  tjjdd  fdd}tj	| 
 |  ||t  | t | fdS )NrR   c                 S   sj   |}t |d tj}t | tj} t | t |tj} t |t t |t | } t j| |ddS )Nr   Fre  )r2   r"  rF   r  r  r  absrT  )r>   rI  r]  Zsize_numr8   r8   r9   reflect  s     z!reflection_pad2d.<locals>.reflectc                    s4   | ^ }}}| }|}|||fS rM   r8   )r0  r   r>   r3  hleftr  topr  r9  r8   r9   rK   !  s    zreflection_pad2d.<locals>.fnr   )rm   r_   r   rn   r3   r   r   rL  r(   r   r   rb   rc   r   )r>   r  r  botr  rK   r8   r  r9   reflection_pad2d  s    &r  c                    s   t |dkst|\ | ^ }tjjd tjjd |  |  ^ }}} fdd}tj	| 
 |  |t| dS )NrR   r   c                    s  | ^  }} fdddd | |	  | 	|  }}d  | d 	 |  }}d  f}d	 
 f}t ||}	t |	fddd	dfd
d	}
|
|||d	f |
|||
 d f |
||df| |
|| d f| |
|||df|d	f |
|||df|
 d f |
||| d f|d	f |
||| d f|
 d f S )Nc                    s    | |fS rM   r8   r2  )r   grad_loaderr8   r9   load_from_output=  s    z?reflection_pad2d_backward.<locals>.fn.<locals>.load_from_outputc                 S   sP   | \}}}t |tj}t |tj}t |tj}t t ||t ||S rM   )r2   r  rF   r  r  r#  r   le)Zindex_ranger~   ZlbZubr8   r8   r9   index_range_condition@  s
    
zDreflection_pad2d_backward.<locals>.fn.<locals>.index_range_conditionr   r   c                      s
    S rM   r8   r8   )center_xcenter_yr  r8   r9   ra  \  rb  z7reflection_pad2d_backward.<locals>.fn.<locals>.<lambda>r  c                    s   |d |d k }t |tr"|r"d S |}|d k	rd|d |d k }t |trT|rTd S t||}t| fddd}t|d S )Nr   r   c                      s
    S rM   r8   r8   )r  out_xout_yr8   r9   ra  l  rb  zKreflection_pad2d_backward.<locals>.fn.<locals>.accumulate.<locals>.<lambda>r  )r@   rh   r2   r#  r$  rE   )r  r  Zindex_range1Zindex_range2Zupper_less_than_lower1r6   Zupper_less_than_lower2g)gradr  r  )r  r  r9   rw  ^  s    z9reflection_pad2d_backward.<locals>.fn.<locals>.accumulater   )N)r2   r#  r$  )r0  r>   r3  Ztop_reflect_xZleft_reflect_yZbot_reflect_xZright_reflect_yZrange_cxZrange_cyr6   rw  r  r  r  r  r  r  r  )r   r  r  r  r  r  r9   rK   :  s8    " ""   z%reflection_pad2d_backward.<locals>.fnr   )rm   r_   rn   r3   r   r   rL  r   r(   r   r   rb   rA   )grad_outputr>   r  rv   Zh_gradZw_gradrK   r8   r  r9   reflection_pad2d_backward/  s    B
r  c                    s:   |   |   fdd}tj|  |  |dS )Nc                    sF   t | } t| tkst D ]}| d | |  | |< q | S r  )rA   rm   r_   )r0  rq   r  r  r9  r8   r9   rh    s
    zrev.<locals>.loaderr   )r   rn   r(   r   r   rb   )r>   r  rh  r8   r  r9   rev  s    r  c              	      sv  t |d dksttdd |D r.t| S |  }tttt|d d d |dd d  t |t   g  D ]:\}}t|t	j
r|jrtjj|n|}||f qxt|d  }g t |d  D ].\\}}	}
|
 |t	|
| |	  qt |t |ks tt|   fddfdd	}|  tj|  |  ||d
S )Nr   r   c                 s   s   | ]}|d kV  qdS r   Nr8   )r=   r  r8   r8   r9   r     s     z"constant_pad_nd.<locals>.<genexpr>r   c                    s~   g }t  d  D ]>\}\}}}|dkr>|t|d |dkr|t|| qttj|}t| fddS )Nr   c                      s    S rM   r8   r8   )r   r9  r8   r9   ra    rb  z/constant_pad_nd.<locals>.mask.<locals>.<lambda>)	r   ry   range_mask_lowrange_mask_highr   r  r2   r#  r$  )r   r%  r0  r  r  r  )boundsr8  
mask_sizesr  r9  r   r9   r%    s    "zconstant_pad_nd.<locals>.maskc                    sZ   t | d  }t| d   D ]\}\}}|||  q"t|t| ksRt|S rM   )rA   r   ry   rm   r_   )r   rj  r0  r  r  )bounds_precompr%  r  r8   r9   	offset_fn  s
    z"constant_pad_nd.<locals>.offset_fnr   )rm   r_   r   r   rn   rA   r   r   r@   rc   r   r   r3   r   r   Zlookup_precomputed_sizery   r   r   rb   r   r(   r   r   )r>   r  r8  r  lr  Z	l_precomprm  r  r  rI  r  r8   )r  r  r8  r%  r  r  r9  r9   constant_pad_nd  s>    *


r  r~   r  c                 C   s&   t t | tjt t|tjS rM   )r2   r   r  rF   r  rc   r   r  r8   r8   r9   r    s    r  r~   r  c                 C   s    t t | tjt |tjS rM   )r2   r!  r  rF   r  r  r8   r8   r9   r    s    r  r~   r  r  c                 C   s   t t| |t| |S rM   )r2   r#  r  r  r  r8   r8   r9   
range_mask  s    r  r  c              	      sX     ^ } r$d ndr4d nd f	dd}|S )Nr   r   c                    sr   | ^  t t   t	  }rVt | 
fddS t | fddS )Nc                      s   t  fS rM   )constant_boundary_condition_2dr8   )ihiwpad_fill_valueprefixr>   r8   r9   ra    s   
z>constant_boundary_condition_2d.<locals>.load.<locals>.<lambda>c                      s    fS rM   r8   r8   )r  r  r  r9  r8   r9   ra    rb  )r2   r#  r  r$  )r   r%  	r8  r  r  r  Z	padding_hZ	padding_wr  r>   r9  )r  r  r  r9   r     s    z,constant_boundary_condition_2d.<locals>.loadrn   r   )r>   r8  r  r  rv   r   r8   r  r9   r     s    r   c                 C   s   t | d||   || d  || d  || }|rt | d||   || d  d|| d   || }tjj|d ||  |  ||  dkr|d8 }tjjd|||  |  ||   tjj|| dkrtjj|| d}n|}||fS )Nr   r   r   F)r   r3   r   r   r+  rt  r   )r>   r~   kernel_sizerP  r  	ceil_modeZx_outZx_altr8   r8   r9   pooling_size  s"    & * *$r	  c                    s  dkrddg|dkr ddg}s( t  d t dt dt |d}t| ts^tt dksnttdks~ttdkstt|dkstt|  dkst|   |  ^ }}t|d |\}}	td |\}
}d sd s|	s|r&t| t	dn| 
 t|||
g } d  d  }|dksltdd |D r~t|  ||S  fd	d
}tj|  |  tj|dd|d}tj|  tjtj|dd|d}||fS )Nr   r   r   rQ   rR   z-inf   c                 s   s   | ]}|d kV  qdS r  r8   rF  r8   r8   r9   r   ?  s     z*max_pool2d_with_indices.<locals>.<genexpr>c                    s   | ^ }}}d }d }t t d t d D ]\}}|d  | d  }|d  | d  }|||f}	|rt| | tj}
|d kr|
}ntt|	||
|}|d kr|	}q0t	|	|}q0|r|S |S d S rn  )
r   productr   r2   r  rF   r  r  gtr  )r0  return_indexr  bhbwr  Zmaxindexr  r  r  r   r  r  rP  r  r9  r8   r9   rK   E  s$    $z#max_pool2d_with_indices.<locals>.fnF)r  r   T)r0   r@   r+   r_   rm   rn   r  r	  r   r   r   rA   r    fallback_max_pool2d_with_indicesr(   r   r   rb   r   r   rF   r  )r>   r  rP  r  r  r  r  r  h_out
ceil_mode1w_out
ceil_mode2r;  window_sizerK   r1r2r8   r  r9   max_pool2d_with_indices  s`    



      r  c                    s  dkrddg|dkr ddg}s(t |ts6ttdksFttdksVttdksftt|dksvtt| dkst|   z|  }W n tk
r   d }Y nX t |trt |jjt	r|jj}	t
jd t
j|	 |	 |	 d|	d}
|
  |
 }n(z| }W n tk
r<   d }Y nX |d k	rV|d dkpj|d k	oj|d dk}tjptjptj}tdd |D s|r|st| ||||S |  | ^ }}
|  ^ }| |   t| }tfd	d
td d D tfdd
td d D 		 }|dkrlt| ||||S |  	
fdd}t	j|  |  ||dS )Nr   r   r   r
  )r   r\   rI  )r  rX  r   c                 s   s   | ]}|d kV  qdS r  r8   rF  r8   r8   r9   r     s     z3max_pool2d_with_indices_backward.<locals>.<genexpr>c              	      s8   g | ]0}t |d   t d | d   d    dqS r   r   re  r=   r  r  rP  r8   r9   r?     s   z4max_pool2d_with_indices_backward.<locals>.<listcomp>c              	      s8   g | ]0}t |d   t d| d   d    d qS r   r   r  r=   r  r  r8   r9   r?     s   r  c                    sZ  | ^ }}}t |
 | tj}|d  }|d  }t t|d  d  d tj}t t|d  d  d tj}t t|d d tj}t t|d d tj}t |t dtj}t |t dtj}t |t tj}t |t tj}d }	tD ]}
t	D ]}t 	|t |
tj}t 	|t |tj}|t j
t |t |t dtjd ddt j
t |t |t dtjd ddf}|} |}t ||}|	d krt ||t dtj}	n:t t t ||t |||}t |t 	|	||	}	q4q&|	d k	sVt|	S )Nr   r   Fre  r   r  )r2   r  rF   r  r   r  r"  r  r   rE   rT  r  r  r  r  r#  r!  r_   )r0  r  r  r  Z
index_testphstartpwstartphendpwendgradientph_pw_phpwZ
grad_indexZindex_actualZ	grad_partr   r%  r  h_window_sizerY  Zindices_sizer  r  pooled_heightpooled_widthrP  w_window_sizewidthr8   r9   rK     sl      
  

z,max_pool2d_with_indices_backward.<locals>.fnr   )r@   r+   r_   rm   rn   r  
get_strideAttributeErrorr   r(   r   rL  r  r   rb   Zdecide_layoutr   Zcoordinate_descent_tuningZmax_autotuneZmax_autotune_pointwiser   )fallback_max_pool2d_with_indices_backwardr   rA   re  r   r   )r  r>   r  rP  r  r  r  r   Z	gO_strider   Zx_bufferZx_strideZis_channels_lastZautotuner  heightrv   r;  r  rK   r8   r+  r9    max_pool2d_with_indices_backwardq  s    
	

        
        ;r5  c                    s(   |   ^ }}}|    fdd}|S )Nc              
      s   |\|\ |\}}t t t   tjt |tjt t  tjt |tj}t | fdddS )Nc                      s      fS rM   r8   r8   )h_start_indexr  r  r  w_start_indexr9  r8   r9   ra  %  rb  z3pad_adaptive_loader.<locals>.load.<locals>.<lambda>r  )r2   r#  r!  r  rF   r  r$  )r  Z
incrementsZstart_indicesZend_indicesh_end_indexw_end_indexr%  r9  )r6  r  r  r  r7  r9   r     s$    z!pad_adaptive_loader.<locals>.loadr  )r>   rv   r  r  r   r8   r:  r9   pad_adaptive_loader  s    r;  c                    s(   |\|\  fdd}|S )Nc                    s   | ^ }}}|} |}|}|}d }	t td td D ]<\}
}|||
|g||g||g}|	d kr||}	qLt||	}	qL|	S rn  r   r  r   r2   rE   )r0  rh  r  r  r  r6  r8  r7  r9  totalr  r  r  Zh_end_index_fnZh_start_index_fnkernel_maxesZw_end_index_fnZw_start_index_fnr8   r9   fn_sum0  s"    $z)_adaptive_pooling_idx_sum.<locals>.fn_sumr8   )r?  Zstart_index_fnsZend_index_fnsr@  r8   r>  r9   _adaptive_pooling_idx_sum,  s    rA  c                    s  t tstt|dkst   ^ }}}tjj	|}tjj	|}|\}}||krr||krrt
S |dks|dkr|||f}t|  dS || dkr|| dkr|| || g}t|S t|| d |}	t|| d |}
t|||g } }dd }dd }tj|||d	}tj|||d	}tj|||d	}tj|||d	}|	|
 }|d
kr~t|S t|	|
g||g||g tt fdd}tj |||d}|S )Nr   r   r5  r   c                 S   s   t | | |S rM   r   r   out_diminp_dimr8   r8   r9   start_indexk  s    z)_adaptive_avg_pool2d.<locals>.start_indexc                 S   s   t | d | | d |S r  rB  rC  r8   r8   r9   	end_indexn  s    z'_adaptive_avg_pool2d.<locals>.end_indexrD  rE  r  c                    s   t  | t | S rM   )r2   divr;  r  r@  ones_loaderr>   r8   r9   rK     s    z _adaptive_avg_pool2d.<locals>.fnr   )r@   r+   r_   rm   r  rn   r3   r   r   rL  r   r:  rb   r   
avg_pool2dr.   rA   r   r   fallback_adaptive_avg_pool2drA  r;  	ones_liker(   r   )r>   rm  r  Zh_inZw_inr  r  Zo_sizer  h_kernel_maxw_kernel_maxr;  r\   rF  rG  r6  r8  r7  r9  r  rK   rvr8   rJ  r9   _adaptive_avg_pool2dM  sT    


rR  c                    s"      ^ }}}tjj|}tjj|}|^ }}}	|| dkrr||	 dkrrt|| ||	 gddS t||}
t||	}dd fdd}tj	||d}tj	|||d}tj	|	|d}tj	||	|d}t
|
|g||g||g  fd	d
}tj  |t|d}|S )Nr   r   )divisor_overridec                 S   s   t | | |S rM   )r   rC  r8   r8   r9   rF    s    z0upsample_nearest2d_backward.<locals>.start_indexc                    s    | d ||S r  r8   rC  )rF  r8   r9   rG    s    z.upsample_nearest2d_backward.<locals>.end_indexrH  c                    s    | t S rM   )r;  r  )r@  r>   r8   r9   rK     s    z'upsample_nearest2d_backward.<locals>.fnr   )r  rn   r3   r   r   rL  rL  r.   r   r   rA  r(   r   r   rb   rA   )r>   rm  Z
input_sizer  r  r  Zinp_hZinp_wZout_hZout_wrO  rP  rG  r6  r8  r7  r9  rK   rQ  r8   )r@  rF  r>   r9   upsample_nearest2d_backward  s8    

rT  r8   c                    s  ssddgt dt dt dt| ts@ttdksPttdks`ttdksptt|  dkst|   |  ^ }}}	t|d|\}
}t|	d|\}}d sd s|s|rt| dd}n| 	 d}t
||
|g }|   d d  }|dkrBt| |||S fd	d
|r^|r|rnd| ndd d    fdd}n*tt| d|rnd fdd}tj|   ||d}|S )Nr   r   r
  r   r  TFr  c           	         s   | ^ }}}d }t t d t d D ]b\}}|d  | d  }|d  | d  }||||f}|d kr|}q,t||}q,|S rn  r<  )	r0  rh  r  r  r  r=  r  r  r  )r  r  rP  r8   r9   r@    s    $zavg_pool2d.<locals>.fn_sumr  c                    s   t | t  S rM   )r2   r   r"  r  )r\   r@  r  r9  r8   r9   rK     s    zavg_pool2d.<locals>.fnc                    s   t  |  | S rM   r2   rI  r  )r@  rK  r9  r8   r9   rK     s    r   )r0   r@   r+   r_   rm   rn   r  r	  r   r   rA   rb   fallback_avg_pool2drN  r(   r   r   )r>   r  rP  r  r  count_include_padrS  r  r  r  r  r  r  r  had_paddingr;  r  rK   rQ  r8   )r\   r@  r  rK  r  r  rP  r9  r9   rL    sj    







  rL  c                    s  d ksdkst ds s,ddgt| ts:t t|tsHt tdksXt tdksht tdksxt t| dkst |   | ^ }td|\}	}
td|\}}|  d pd p|
p||  ^ }	
t| }|	 }t
fddtd d D t
fddtd d D  }|d	krt| ||S fd
d  	
fdd}tj|  |||d}|S )Nr   zdivisor must be not zeror   r
  r   c              	      s8   g | ]0}t |d   t d | d   d    dqS r  r  r  r  r8   r9   r?   N  s   z'avg_pool2d_backward.<locals>.<listcomp>c              	      s8   g | ]0}t |d   t d| d   d    d qS r  r  r   r  r8   r9   r?   T  s   r  c              	      sX  t d tj}t d tj}t d tj}t d tj}t d tj}t d tj}t t | ||}t t |||}	t t ||t t  tj|}
t t |	|t t tj|}t 	|t dtj}t 	|	t dtj}	t |
t  tj}
t |t tj}t t |
|t ||	}|S )z{
        This computes the scaling factor that we will divide an element
        by when `count_include_pad=False`
        r   r   )
r2   r"  rF   r  r  r   r  rE   r  r  )r)  r*  Zstride_hZstride_wZpad_hZpad_wZkernel_hZkernel_wZhstartZwstartZhendZwendZdivide_factor)r4  r  r  rP  r0  r8   r9   !compute_pool_size_without_paddingh  s,    

z>avg_pool2d_backward.<locals>.compute_pool_size_without_paddingc                    s\  | ^ }}}|d  }|d  }t t|d  
d  
d tj}t t|d  
d  
d tj}t t|
d d tj}t t|
d d tj}t |t dtj}t |t dtj}t |t tj}t |t 	tj}d }tD ]6}	tD ]$}
t 	|t |	tj}t 	|t |
tj}d k	r`}n(sls~d d  }n
 ||}t 
|t jt |t |t dtjddt jt |t |t dtj	ddf|}t t ||t ||}|d kr,t ||t dtj}nt |t 	|||}qq|d k	sXt|S )Nr   r   Fre  r  )r2   r  r   rF   r  r  r"  r  r   rE   truedivrT  r  r#  r!  r  r  r_   )r0  r  r  r  r"  r#  r$  r%  r&  r'  r(  r)  r*  r  partr%  )rY  rW  rS  r  r,  rX  r  r  r-  r.  rP  r/  r8   r9   rK     sv      

  


zavg_pool2d_backward.<locals>.fnr   )r_   r@   r+   rm   rn   r  r	  r   rA   rb   re  r   fallback_avg_pool2d_backwardr(   r   r   )r  r>   r  rP  r  r  rW  rS  r  r  r  r  r  rv   r;  r\   r  rK   rQ  r8   )rY  rW  rS  r  r,  rX  r4  r  r  r-  r.  rP  r/  r0  r9   avg_pool2d_backward&  sf    
"Ar]  c                 C   s   |   }t|tr|g}n|s*tt|}t|dkrTt|dksPtd| g S t|}tt|D ]j}|| dk r||  t|rt|nd7  < d||   krt|k shn t|dkr|| dkshtqhtt|t|kstd|S )Nr   )r8   )r   )r   zinvalid axis: r   zreduction axis not unique)	rn   r@   r]   r   rm   rB   r_   rA   rC   )r>   axisrI  r~   r8   r8   r9   _validate_reduction_axis  s    
 <r_  c          
         s   |d k	rt | |} |  tt| |}g }g g }g ttD ]>}||krj| ||  qD| ||  qD fdd}rt}	D ]}t	d|	|< qn|}	| 
  t|  |p|  |  ||	|dS )Nc                    s   t |t kstrXt  t ks,tt fddD sFt fddD  t  t ksltd gt  t |  }tt t|D ]\}}|||< q|S )Nc                 3   s   | ]} | d kV  qdS r  r8   r   r   r8   r9   r     s     z8_make_reduction_inner.<locals>.loader.<locals>.<genexpr>c                    s   g | ]} | qS r8   r8   r   r   r8   r9   r?     s     z9_make_reduction_inner.<locals>.loader.<locals>.<listcomp>)rm   r_   r   r   chainr   )r   Zreduction_indexrj  r0  varZinner_loaderkeepdimsZkept_idxZreduced_idxrI  r   r9   rh    s     
z%_make_reduction_inner.<locals>.loaderr   )r   	dst_dtype	src_dtyper   r   reduction_ranges)r   rn   rC   r_  r   rm   ry   rA   rc   r   r   dictr   rb   )
r>   r^  rc  r\   r   Z
kept_sizesZreduced_sizesr~   rh  r;  r8   rb  r9   _make_reduction_inner  s:    



rh  )r  c                    s   dd d fdd}|S )NFr[   c                   s@   t | ||| d}tjf di|}t|jjtr<|  |S )Nr^  rc  r\   r   r  )rh  r)   r   r@   r   r   )r>   r^  rc  r\   r   rT  r   r  r8   r9   r     s     zmake_reduction.<locals>.inner)NFr8   )r  r   r   r8   rj  r9   make_reduction  s    rk  c                   s   |d k	rt | |} |   t| |}|  }|tjtjfkrHt | tj} t| ||}t	 fdd|D }t
||  |  }t|t| }t t|||S )Nc                 3   s   | ]} | V  qd S rM   r8   r   rI  r8   r9   r   0  s     zmean.<locals>.<genexpr>)r   rn   r_  rb   rF   float16bfloat16r   sum_r1   r   r$   r   r#   r   rA   rI  )r>   r^  keepdimr\   Zoutput_dtype
sum_resultdenomr8   rl  r9   r  %  s    

r  c           
         s   |d krd}|    t| |}t| |dd}|r8|  tt| |}t|||}t fdd|D }|rt|| }t	|| 
 |  }t|t|  }t||}	|s|	S |r|nt||}|	|fS )Nr   T)rp  c                 3   s   | ]} | V  qd S rM   r8   r   rl  r8   r9   r   C  s     z var_mean_sum_.<locals>.<genexpr>)rn   r_  r  r   squarer  ro  r1   r   r$   rb   r   r#   r   rA   rI  r  )
r>   r^  
correctionrp  return_meanZx_meanZdiffsrq  rr  Zx_varr8   rl  r9   var_mean_sum_6  s&    

rv  c                 C   sV   t | |}t| ||d d d}|d }t|d }t|tjoTt|tjk oTt|dkS )Nri  r   rf  r   )	r_  rh  r1   r@   rc   r   r]   r   Zunroll_reductions_threshold)r>   r^  rp  r   r   Zreduction_numelr8   r8   r9   use_two_step_varianceP  s    
    
rw  c                   s    d krd t | ||d d d}|d}|d |d tjjf |fd|  d|\}}}	|  |  |  t| |}t	fdd	|D d
d  fdd}
t
|
|}|r|  ||fS |S )Nr   ri  r   rd  re  Zwelford_reduce)Z	inner_fnsr  r\   c                 3   s   | ]} | V  qd S rM   r8   r   rl  r8   r9   r   v  s     z$var_mean_welford_.<locals>.<genexpr>c                 S   s6   t | tjr*|  s*tt| tj|S t	| |S rM   )
r@   rc   r   Zis_constantr2   r   r  rF   r  r"  r   r8   r8   r9   get_constant_or_index_exprx  s    z5var_mean_welford_.<locals>.get_constant_or_index_exprc                    s     }}| ||  S rM   r8   )r   r  r  )rt  r\   rx  rnumelr8   r9   r  }  s    

z#var_mean_welford_.<locals>.scale_fn)rh  r^  r   ZWelfordReductionr   rb   r   rn   r_  r1   r   )r>   r^  rt  rp  ru  r   rh  r  m2rv   r  ra  r8   )rt  r\   rx  ry  rI  r9   var_mean_welford_`  s>        



r{  )rt  rp  c                C   s2   t | ||dr t| |||ddS t| |||ddS )Nr^  rp  Fr^  rt  rp  ru  rw  rv  r{  r>   r^  rt  rp  r8   r8   r9   var_  s            r  c                C   s2   t | ||dr t| |||ddS t| |||ddS )Nr|  Tr}  r~  r  r8   r8   r9   var_mean  s            r  c                 C   st   |dk rt t| | |S |dkr0td|S |dkr<| S t | |d |}t||}|d dkrpt|| }|S )Nr   r   r   )pow_recursiver2   
reciprocalr"  r   )r>   r3  r\   rT  r8   r8   r9   r    s    r  c                 C   s   t | |S rM   )r2   powr   r   r8   r8   r9   
pow_native  s    r  )r   c                    s8  t tr$tkr$t tS t tr>dkr>t S t trXdkrXt S tdd  fD }t|}t tod  k odk n  p|odk}|rވ   fdd	}t	j
    |  d
S t  tr dk rtdS  dkrt rtS |r.t S t S )Nr  r   c                 s   s"   | ]}t |tjr| V  qd S rM   )r@   r   r+   rb   r<   r8   r8   r9   r     s      zpow.<locals>.<genexpr>i    r   c                    s   t |   S rM   )r  rb   r  r   r   rh  r8   r9   rK     s    zpow.<locals>.fnr   r   )r@   r   r]   r  sqrtr   r   r   r   r(   r   r   rb   rn   r   r#  r   exp2fallback_powr  )r   r   r\   Zis_integer_powZembed_exponentrK   r8   r  r9   r    s8    
"


r  c                 C   s   t | tr| j}n| }t |tr&|j}t |tjsftj|  |  |	 | 
 dj}t |tjsftt |tjr| st |jtjs|  |j|_| S tj|| | S r  )r@   r+   r   r   Z
StorageBoxr(   r   r   rb   r   rn   r_   Zis_input_bufferZ	NopKernelr   r  Zrealize_into)changedr  Zchanged_datar8   r8   r9   rj    s.    

rj  c                 C   s   t | t| |S rM   )rj  r#  )r>   r8  r8   r8   r9   fill_  s    r  c                 C   s4   t ||  }t||  }t||  }t| |S rM   )r  r   r   rb   r   rn   rj  )dstrk  r  r8   r8   r9   r    s    r  c                 C   s   t | |S rM   )r2   floordivr  r8   r8   r9   r    s    r  c                 C   s   t | |S rM   )r2   truncdivr  r8   r8   r9   r    s    r  c                 C   s   t | ot |}t| ot|}|dkrP|r4td|rBt| |S tt| |S |dkr|rdtd|rrt| |S tt| |S t| |S )Nr(  z5floordiv operands can not be boolean at the same timer*  z5truncdiv operands can not be boolean at the same time)rg   ri   r_   r  r(  rI  r  r*  )r   r   Zrounding_modeZboth_integerZboth_booleanr8   r8   r9   div_mode  s    r  c                 C   s<   t | ot |}|rt| |S ttjj}t|| |S d S rM   )ri   logical_andr&   rZ  r   r  r   )r   r   Z	both_boolrK   r8   r8   r9   r   )  s
    
r   c                 C   s4   t | pt| }|rt| |S dd }t|| |S )Nc                  W   s
   t j|  S rM   rU  r   r8   r8   r9   rK   <  s    zdiv_prim.<locals>.fn)ri   rg   r  r   r   r   Zis_integralrK   r8   r8   r9   div_prim5  s
    
r  c                 C   s4   t | pt| }|rdd }ndd }t|| |S )Nc                 S   s   t | |S rM   )r2   modr  r8   r8   r9   rK   O  s    zfmod.<locals>.fnc                 S   s   t | |S rM   )r2   fmodr  r8   r8   r9   rK   T  s    )ri   rg   r   r  r8   r8   r9   r  I  s
    
r  c                 C   s:   |   }t|st|r&t| t } dd }t|| S )Nc                 S   s
   t | S rM   )r2   rsqrtrf   r8   r8   r9   _rsqrt`  s    zrsqrt.<locals>._rsqrt)rb   r   r   r   rF   r1  r   )r>   r\   r  r8   r8   r9   r  Z  s
    r  c                C   sB   t |  st|  r&|d kr&tj}td|d}|| |||dS )Nr  r   r[   r   rb   r   rF   r  rk  r>   r^  rc  r\   rK   r8   r8   r9   ro  f  s    

ro  c                C   sB   t |  st|  r&|d kr&tj}td|d}|| |||dS )Nr  r   r[   r  r  r8   r8   r9   r  q  s    

r  c                 C   s   t | tj} td| ||dS )Nr   r^  rc  )r   rF   rh   rk  r>   rq   rp  r8   r8   r9   
reduce_any|  s    r  c                 C   s2   |d k	r$t | ||dt| ||dfS t | d |dS Nr  )reduce_amaxreduce_argmaxr  r8   r8   r9   
reduce_max  s
    r  c                 C   s2   |d k	r$t | ||dt| ||dfS t | d |dS r  )reduce_aminreduce_argminr  r8   r8   r9   
reduce_min  s
    r  xor_sumre  rf  argmaxr   argmin
logical_or)r   r   c                 C   s   t | tjdS )Nrj   r  r   INT_TO_FLOATr   r8   r8   r9   register_pointwise_numeric  s     r  c                 C   s   t | tjddS )NT)rk   r  r  r  r8   r8   r9    register_pointwise_numeric_ldf64  s
    r  r	  logical_not)r   )rk   r   r   identityc                    s   t | d d fdd}|S )Nrj   c                     s*    | |}t || d  }t| d |S r   )r   rb   rj  )ru   r   rT  outplace_opr8   r9   rK      s    
zregister_inplace.<locals>.fn)r   )Zaten_opr  rK   r8   r  r9   register_inplace  s    
r  c                 C   s.   t jj }|d k	st| |jjjks*t| S rM   )rF   Z_guardsZTracingContextr  r_   Z	fake_modeZ	shape_envZvar_to_range)r   rf  re  Ztracing_contextr8   r8   r9   sym_constrain_rangeI  s    r  c                 C   s   |   | S rM   r  r   rq   r8   r8   r9   sym_sizeQ  s    r  c                 C   s   |   | S rM   )r1  r  r8   r8   r9   
sym_strideV  s    r  c                 C   s   |   S rM   )r  rG  r8   r8   r9   	sym_numel[  s    r  c                 O   s   t dd S )NzHelpful for debuggingr4   )r  ru   r   r8   r8   r9   foobard  s    r  c                 C   s   |    t| S rM   )r   r   rf   r8   r8   r9   _realizei  s    r  c                 C   s   t tj| S rM   )r+   r   r   ZWait)r   r8   r8   r9   waitt  s    r  c                 C   s   t j| ||||S rM   )r   Z	AllReducer   r   Z	reduce_optagranks
group_sizer8   r8   r9   	allreducex  s    r  c                 C   s   t tj| |||S rM   )r+   r   r   ZAllGatherIntoTensor)Zshardr  r  r  r8   r8   r9   all_gather_into_tensor|  s    r  c              	   C   s   t tj| ||||S rM   )r+   r   r   ZReduceScatterTensorr  r8   r8   r9   reduce_scatter_tensor  s    r  c                 C   s   t j| ||||S rM   )r   ZAllReduceCoalescedr   r  r8   r8   r9   all_reduce_coalesced  s    r  c                 C   s"   t j| |||}tttj|S rM   )r   ZAllGatherIntoTensorCoalescedr   rA   mapr+   )r  r  r  r  rT  r8   r8   r9    all_gather_into_tensor_coalesced  s    r  c                 C   s$   t j| ||||}tttj|S rM   )r   ZReduceScatterTensorCoalescedr   rA   r  r+   )r  ZreduceOpr  r  r  rT  r8   r8   r9   reduce_scatter_tensor_coalesced  s        r  zRInductor support for distributed collectives depends on building torch.distributedr  )quantized_lowerings)N)NNNNF)F)F)F)N)N)N)r   r   r>  r   )N)N)N)r   )r   r   r   )r   r   r   )r   r   r   )r   )r   )r   )r   )r   )T)T)NT)F)r   NNr   )NN)F)r   FF)F)F)F)N)r   )N)NN)NNN)NN)r   )Nr  )Nr   r   F)NNNN)r8   r   FTN)N)N)NF)N)N)F)N)NF)NF)NF)NF)NF(z  r   r   loggingr  r  collectionsr   collections.abcr   typingr   r   r   r   r   rc   rF   Ztorch.fxZtorch.utils._pytreeutilsZ_pytreer  Ztorch._prims_commonr	   r
   r   r   r   r   r   r   r   r   r   Z%torch.fx.experimental.symbolic_shapesr   r   r   Ztorch.utils._sympy.functionsr   r   r   Z_dynamo.utilsr    r   r   r   r    decompositionr!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   Zvirtualizedr2   r3   	getLoggerr  r  rx   rN   rC   r   rZ  Ztr_c10dr  rD   r   r:   r;   rP   rR  rL  r]  ZbmmZconvolutionZconvolution_backwardr  r5  mmr  Zupsample_bicubic2dZ_int_mmrV  Zint8Zint16r  r  rm  r  r   Z	complex32Z	complex64rh   rn  r^   r]   r`   rg   ri   rw   r{   r   r   r   r   r   r   r   r   r   r\   r   Zconvert_element_typer   r   r  r   r   r  Z
device_putr  r  r
  r  r   aliasdetachZdetach_ZliftZview_ofr  rl   r  r  r  r   r!  r%  r&  r(  r)  r*  r   r1  r4  r<  Z_unsafe_viewZreshaper=  slicerE  rK  rB   rS  rU  r[  rh  ri  rl  rm  rp  rq  rr  ry  r-  r{  r?  r}  r  r\  	lru_cacher  r  r  r  r  r  r  r  r  r  Zrngprimsr  r  r  Z	bernoullir  r  r  r  r  r  r  r  r  randintZforce_stride_orderr  r  r  r  r  Zlookup_seedr  randomrZ  r  r  r  r	  r
  rY  r  ZFALLBACK_ALLOW_LISTZ_adaptive_avg_pool2d_backwardZ
_cudnn_rnnZ_cudnn_rnn_backwardZcumsumZcumprodZ_embedding_bagZ_embedding_bag_forward_onlyZ_flash_attention_forwardZ_flash_attention_backwardZ_fused_moving_avg_obs_fq_helperZ*_fused_moving_avg_obs_fq_helper_functionalZgrid_sampler_2d_backwardZrandpermZ'_scaled_dot_product_efficient_attentionZ0_scaled_dot_product_efficient_attention_backwardZ#_scaled_dot_product_flash_attentionZ,_scaled_dot_product_flash_attention_backwardsortZstableZ(_sparse_coo_tensor_with_dims_and_tensorsZ_thnn_fused_lstm_cellZtopkZupsample_bicubic2d_backwardr  Zupsample_linear1dZupsample_trilinear3dZupsample_linear1d_backwardZupsample_trilinear3d_backwardZ_adaptive_avg_pool3dZadaptive_max_pool2dZadaptive_max_pool3dZaddbmmZaddmvZ_addmm_activationZ
avg_pool3dZ
block_diagZ_cdist_forwardZcummaxZcumminZdigammaZ_efficientzerotensorZ*_embedding_bag_per_sample_weights_backwardZfractional_max_pool2dZfractional_max_pool3dfrexpZgeqrfZhistcZi0ZigammaZigammacisinZkthvalueZlinalg_cholesky_exZlinalg_crossZ_linalg_detZlinalg_householder_productZlinalg_inv_exZlinalg_ldl_factor_exZlinalg_ldl_solveZ	linalg_luZlinalg_lu_factor_exZlinalg_lu_solveZlinalg_matrix_expZ	linalg_qrZ_linalg_slogdetZ_linalg_solve_exZlinalg_solve_triangularZ_linalg_svdZlogcumsumexpZ	lu_unpackZmax_pool3d_with_indicesZmax_unpool2dZmax_unpool3dZmedianr  Z	nanmedianZormqrZ_pdist_forwardZpixel_shuffleZpixel_unshuffleZ	polygammaputZreflection_pad1dZreplication_pad1dresizeZresize_Z	resize_asZ
resize_as_ZsearchsortedZspecial_airy_aiZspecial_bessel_j0Zspecial_bessel_j1Zspecial_bessel_y0Zspecial_bessel_y1Zspecial_chebyshev_polynomial_tZspecial_chebyshev_polynomial_uZspecial_erfcxZspecial_hermite_polynomial_hZspecial_hermite_polynomial_heZspecial_i0eZ
special_i1Zspecial_i1eZspecial_laguerre_polynomial_lZspecial_modified_bessel_i0Zspecial_modified_bessel_i1Zspecial_modified_bessel_k0Zspecial_modified_bessel_k1Zspecial_ndtriZ!special_scaled_modified_bessel_k0Z!special_scaled_modified_bessel_k1Zspecial_spherical_bessel_j0Zspecial_zetaZtakeZ
_trilinearuniformZunsafe_splitZvdotZ_adaptive_avg_pool3d_backwardZadaptive_max_pool2d_backwardZadaptive_max_pool3d_backwardZavg_pool3d_backwardZ_cdist_backwardZ_embedding_bag_dense_backwardZfractional_max_pool2d_backwardZfractional_max_pool3d_backwardZ_linalg_check_errorsZ max_pool3d_with_indices_backwardZ_pdist_backwardZreflection_pad1d_backwardZreplication_pad1d_backwardZsoft_margin_loss_backwardZlinalg_pinvZatol_rtol_tensorZsegment_reducer  Z_segment_reduce_backwardZangleZcholesky_inverseZcholesky_solveZ_fft_r2cZ	histogramZbin_ctZ_histogramdd_bin_edgesZ_histogramdd_from_bin_ctsZindex_reduceZmasked_scatterZ	to_sparseZ
_to_sparseZtriangular_solvegcdZ_linalg_eighro   r=  Z_primsZ	rng_primsZrun_and_save_rng_stateZrun_with_rng_stateZexponentialr   r   r  r  r  r'  r(  r  Zscalar_tensorr2  Z
LongTensorr4  r5  r9  r#  r<  r:  r;  r  Z
empty_likerN  Z
zeros_likerH  rK  rB  rN  rR  rS  rW  r\  rb  rp  r   rt  rx  rz  r{  r~  ru  ry  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r   r  r  r  r   r	  r  r3  r;  rA  rR  rM  rT  rV  r\  r_  rh  rk  r  rv  rw  r{  ra  r  r  r  r  r  r  rj  r  r  r  r  rI  r  r   r  Ztrue_divideZTensorr  r  r  r  ro  r  r   r  re  r  rf  r  r  r  r  r  r  r  r  r  r  rE   r  r  expr  expm1Zrelur|  r  rs  r  cossinr  Zbitwise_andZbitwise_left_shiftZbitwise_notZ
bitwise_orZbitwise_right_shiftZbitwise_xorlgammaerfZspecial_erflog1ptantanhr  r  r  logical_xorr  r  Z	clamp_minZ	clamp_maxnegr  	remaindersignZsignbitr  r!  r   r  r  necoshsinhacosacoshasinasinhatan2atanatanhcopysignerfcZerfinvhypotlog10Z	nextafterZ_foreach_addZScalarZ_foreach_mulZ_foreach_subZ_foreach_negZ_foreach_powZScalarAndTensorZ_foreach_divZ_foreach_sqrtZ_foreach_maximumZ_foreach_reciprocalZ_foreach_signZ_foreach_copyr  Zadd_Zbitwise_and_Zbitwise_left_shift_Zbitwise_not_Zbitwise_or_Zbitwise_right_shift_Zbitwise_xor_Zmul_Zdiv_ZTensor_modeZlogical_and_Zlogical_not_Zlogical_or_Zlogical_xor_Zsub_Zrelu_Zsigmoid___and__
__lshift____or__
__rshift____xor____iand____ilshift____ior____irshift____ixor__r  r  r  r  r   methodfuncr  Z_inductor_testr   r  Z)torch.distributed._functional_collectivesZc10d_functionalZwait_tensorr  Z
all_reducer  r  r  r  r  r  ImportErrorr  r  r  Zregister_quantized_opsr8   r8   r8   r9   <module>   sh  44
		"4
"    
8M

-















/
:,
	


		  Y
%
&
	
	










     .

















I
8











   
   %-	

	]&"e &"     
 l

T6( &      P 
C      0    ]	 (2
*
*











  
 
 












