U
    9%evx                     @   sr  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlZd dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlZd dlZd dl m!Z!m"Z" d dl#m$Z$m%Z%m&Z& ddl'm(Z( dd	l)m*Z*m+Z+ e,e-Z.ed
Z/eej0ej0f Z1dd Z2e3de4dddZ5e3de4dddZ6dd Z7eeej8 e9f ej8dddZ:dd Z;dd Z<ee/ ee/ dddZ=e>e>e>ddd Z?e>e>d!d"d#Z@eee>ejAf  eej0 d$d%d&ZBeee>ej0f  eee>ejAf  d$d'd(ZCd)d* ZDd+d, ZEded-ef e>eFd.d/d0ZGdd4d5ZHd6d7 e!_Id8d7 e"_Iee9d9d:d;ZJeee9 d<d=d>ZKe>dd?d@ZLdAdB ZMdCdD ZNdEdF ZOdGdH ZPdIdJ ZQdeejRjS eejRjS dKdLdMZTdNdO ZUej0e9dPdQdRZVe9ejWdSdTdUZXej0eeef ej0dVdWdXZYej0e9dYdZd[ZZej0e9d\d]d^Z[d_d` Z\e j]dadbdcdddege^ e^ e^ e^ gdfZ_ej`ddgdhZaee> ddidjZbe3dkdldm ZcG dndo doeZdG dpdq dqZeG drds dsZfe3ddtdu ZgdvdwdxdyZhdzd{ ZiG d|d} d}Zjd~d Zkdd Zlej`dd ZmdddZndd Zod dejpe>e>dddZqdddZrdd Zsdd Ztdd Zudd Zvej0ejwdddZxej`dd Zydd ZzdZ{zd dl|Z|W n e}k
r   dvZ{Y nX dd Z~dd Zdd Zdd Zdd Ze3ddd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )    N)StringIO)AnyCallableDictIterableList
NamedTupleOptionalSetTypeVarUnion
ValuesView)mock)immutable_dictimmutable_list)CleanDivFloorDivModularIndexing   )config)current_deviceget_device_capability_Tc                  O   s:   t d dd }| \}}||kr,d||< || |d S )Nc                  S   sP   zddl m}  W n tk
r,   tdY nX | t| jdd k	rJdndfS )Nr   )do_benchzrequires TritonZ	quantilesZpercentiles)triton.testingr   ImportErrorNotImplementedErrorinspect	signature
parametersget)triton_do_bench r"   T/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torch/_inductor/utils.pyload_triton0   s    	zdo_bench.<locals>.load_triton)g      ?g?g?r   )	functools	lru_cache)argskwargsr$   r!   Zquantile_field_namer"   r"   r#   r   /   s    

r   returnc                  C   sF   t j sdS zdd l} | d k	o(t dkW S  tk
r@   Y dS X d S )NFr   )   r   )torchcudais_availabletritonr   r   )r/   r"   r"   r#   
has_tritonN   s    
r0   c                  C   sF   z*ddl m}  | d k	o(tttjdd dW S  tk
r@   Y dS X d S )Nr   	roi_alignZtorchvisionr2   F)Ztorchvision.opsr2   hasattrgetattrr,   Zopsr   r1   r"   r"   r#   has_torchvision_roi_alignZ   s    
 r5   c                  G   s   t tjdd | D S )Nc                 S   s   g | ]}|r|qS r"   r"   .0xr"   r"   r#   
<listcomp>g   s      z'conditional_product.<locals>.<listcomp>)r%   reduceoperatormul)r'   r"   r"   r#   conditional_productf   s    r=   )devicer*   c                 C   sP   | d krt djS t| tr(t | } | jdkrL| jd krLt jdt dS | S )Ng        r-   )index)r,   tensorr>   
isinstancestrtyper?   r   )r>   r"   r"   r#   decode_devicej   s    

rD   c                 C   s   t tj| tdS )Nr   )r%   r:   r;   r<   sympyIntegeritr"   r"   r#   sympy_productt   s    rI   c                 C   s2   t | t |kstttdd t| |D S )Nc                 s   s   | ]\}}|| V  qd S Nr"   )r7   abr"   r"   r#   	<genexpr>z   s     zsympy_dot.<locals>.<genexpr>)lenAssertionErrorrE   expandsumzip)Zseq1Zseq2r"   r"   r#   	sympy_dotx   s    rS   )rH   r*   c                 C   s   dd | D   S )Nc                 S   s   i | ]}t ||qS r"   )idr6   r"   r"   r#   
<dictcomp>~   s      zunique.<locals>.<dictcomp>)valuesrG   r"   r"   r#   unique}   s    rW   )numerdenomr*   c              	   C   sF   t | trt |ts:t|  dt|  d| dt| | |   S )Nz: , )rA   intrO   rC   )rX   rY   r"   r"   r#   ceildiv   s      r\   )nr*   c                 C   s`   | dkst d| d8 } | | d? O } | | d? O } | | d? O } | | d? O } | | d? O } | d7 } | S )z9Return the smallest power of 2 greater than or equal to nl        z32-bit onlyr               )rO   r]   r"   r"   r#   next_power_of_2   s    rc   )lstr*   c                 C   s   dd | D S )z
    Gets the shape and stride of a tensor. For non-symbolic tensors, this is
    trivial. But for symbolic tensors, we need to map from SymIntNode into
    sympy.Expr.
    c                 S   s*   g | ]"}t |tjr|jjnt|qS r"   )rA   r,   SymIntnodeexprrE   rF   r7   ir"   r"   r#   r9      s    z-convert_shape_to_inductor.<locals>.<listcomp>r"   rd   r"   r"   r#   convert_shape_to_inductor   s    rk   c                    s   ddl m   fdd| D S )zz
    Takes a list of shapes from Inductor and converts them into symints (or just
    ints if all shapes are static).
    r   Vc                    sB   g | ]:}t |tr|n&t |tjr*t|n jjjj|d dqS )N)hint)rA   r[   rE   rF   graphZsizevarsZ	shape_envZcreate_symintnoderh   rl   r"   r#   r9      s   

z+convert_shape_to_symint.<locals>.<listcomp>)Zvirtualizedrm   rj   r"   rl   r#   convert_shape_to_symint   s    
rp   c           
      C   s   t j }g }g }t|D ]@\}}t|t jrP||d|  || q|| qtdd |	 D svt
|| t||}t| jjdkrt| jjd jdkr|f}|| t ji |}	|	|fS )Nargc                 s   s   | ]}t |tj V  qd S rJ   )rA   r,   Tensorr6   r"   r"   r#   rM      s     z$gen_gm_and_inputs.<locals>.<genexpr>r   r   rr   )r,   fxZGraph	enumeraterA   rr   appendplaceholderallrV   rO   call_functiontuplerN   Z_schemaZreturnsrB   rC   outputZGraphModule)
targetr'   r(   gZg_argsZa_argsr]   rq   rf   gmr"   r"   r#   gen_gm_and_inputs   s$    

r~   c                   C   s   t j rt j  d S rJ   )r,   r-   r.   synchronizer"   r"   r"   r#   r      s    
r   .)modeltimesr*   c                 C   sP   t   td t }t|D ]}| | }t   q t }|d k	sHt|| S )Ni9  )r   r,   Zmanual_seedtimeperf_counterrangerO   )r   Zexample_inputsr   t0_resultt1r"   r"   r#   timed   s    
r   r"   
         ?c                    s>   t  fddt|D }t |}t|| d |S )Nc                    s   g | ]}t  qS r"   )r   )r7   r   r'   fnr   r"   r#   r9      s     z%print_performance.<locals>.<listcomp>z.6f)r,   r@   r   Zmedianprint)r   r'   r   repeatZbaselineZtimingsZtookr"   r   r#   print_performance   s     
r   c                 C   s   t t|  S rJ   )hashry   itemsselfr"   r"   r#   <lambda>       r   c                 C   s   t t| S rJ   )r   ry   r   r"   r"   r#   r      r   objmethodc                    s$   t | |  t| | fdd dS )zKReplace obj.method() with a new method that returns a precomputed constant.c                      s    S rJ   r"   r"   r   r"   r#   r      r   z#precompute_method.<locals>.<lambda>N)r4   setattrr   r"   r   r#   precompute_method   s    r   )r   methodsc                 C   s   |D ]}t | | qdS )zFReplace methods with new methods that returns a precomputed constants.N)r   )r   r   r   r"   r"   r#   precompute_methods   s    r   c                 C   s   t | |kt | |k  S rJ   )r[   )rK   rL   r"   r"   r#   cmp   s    r   c                 C   s*   t | dkr"t| | d g| S | S d S )Nr   r   )rN   rC   )r8   sizer"   r"   r#   pad_listlike   s    r   c                    s*   d j  dt  fdd}|S )N___cachec                    s$   t | st|  |  t| S rJ   )r3   r   r4   r   r   keyr"   r#   wrapper  s    
zcache_on_self.<locals>.wrapper)__name__r%   wraps)r   r   r"   r   r#   cache_on_self  s    r   c                 C   sN   ddl m} t| tr2ttjdd | D t S t| |j	rD| j
S t S d S )Nr   irc                 S   s$   g | ]}t |d r|jr|jjqS )rf   )r3   rf   origins)r7   rf   r"   r"   r#   r9     s   
 z%aggregate_origins.<locals>.<listcomp>) r   rA   listr%   r:   r;   or_setZExternKernelr   )node_scheduler   r"   r"   r#   aggregate_origins  s    
	r   c                 C   s   t | }|dkr,dd |D }tt|}n|dkrg }|D ]X}|jdkr<d|jkr<t|jd d tr~||jd d  q<||jd d j q<tt|}n|dkrd	d |D }nt	|}d

dg| S )Noriginal_atenc                 S   s.   g | ]&}|j d krd|jkr|jd jjqS )rx   r   )opmeta_overloadpacketr   r7   originr"   r"   r#   r9   $  s   
 
z)get_fused_kernel_name.<locals>.<listcomp>r,   rx   Z	source_fnr   Zinductor_nodec                 S   s   g | ]}|j d kr|jqS rx   )r   namer   r"   r"   r#   r9   5  s    
 r   Zfused)r   sortedr   r   r   rA   rB   ru   r   r   join)r   Zdescriptive_namesall_originssourcesr   r"   r"   r#   get_fused_kernel_name   s*    r   c                 C   s  t | }dd |D }tt}tt}|D ]Z}d|jkr\t|jd j}|| |j d|jkr.|jd d d }|| |j q.|j	 dd
t|  dd
t|  d	}g }	t| D ].\}
}|	|j	 d
|
 dd
t|  q|d
|	fS )Nc                 S   s   g | ]}|j d kr|qS r   )r   r   r"   r"   r#   r9   @  s     
 z'get_kernel_metadata.<locals>.<listcomp>r   Z	from_noder   z Source Nodes: [rZ   z], Original ATen: [] z => 
)r   collectionsdefaultdictr   r   rB   r   ru   r   commentr   r   keysr   )r   r   r   Zinductor_nodesZfrom_node_dictZoriginal_aten_dictrf   r   metadataZdetailed_metadataZoriginal_nodenodesr"   r"   r#   get_kernel_metadata>  s&    



2r   )initial_queuer*   c                 C   sX   t | } t| }| rT|  }|jD ].}|r4||r4q"||kr"|| | | q"q|S )zJReturns the set of nodes whose values depend on those within initial_queue)r   r   popZusersaddru   )r   Zskip_filterZdominated_setrf   userr"   r"   r#   dominated_nodesX  s    

r   c                    s\   dd l }ddlm   fddfdd| D }fdd| D }t|j|| S )	Nr   r   r   c                    sD   t |  jr| jS t |  jr,| jS t |  joBt |  jS rJ   )rA   	TensorBoxdataZ
StorageBoxZIRNodeZ	Pointwiserb   r   is_unrealized_noder"   r#   r   p  s
    

z*gather_origins.<locals>.is_unrealized_nodec                    s   g | ]} |r|j qS r"   r   )r7   valr   r"   r#   r9   w  s      z"gather_origins.<locals>.<listcomp>c                    s   g | ]} |r|j qS r"   r   )r7   rq   r   r"   r#   r9   x  s      )	itertoolsr   r   rV   r   chain)r'   r(   r   Zkwarg_originsZarg_originsr"   r   r#   gather_originsk  s    r   )rg   r*   c                 C   s   t | tjr| jS t | tjr0dtt| jS t | tj	rNdtt| jS t | t
ttfr| jj ddtt| j dS t| S )z
    Normal sympy str is very slow, this is a lot faster.  The result are
    somewhat worse, as it doesn't do as much simplification.  So don't
    use this for final codegen.
    z + z * (rZ   ))rA   rE   Symbolr   Addr   map	sympy_strr'   ZMulr   r   r   funcr   rB   )rg   r"   r"   r#   r   |  s    "r   )r   r*   c                 C   s    | d dkst tj| dddS )Nr   sT)integerZnonnegative)rO   rE   r   r   r"   r"   r#   sympy_symbol  s    r   )rg   replacementsr*   c                    s$   dd  |   fdd| D S )z=
    xreplace is faster than subs, but is way more picky
    c                 S   s   t | trt| S | S rJ   )rA   rB   r   )r   r"   r"   r#   promote_strings  s    
z#sympy_subs.<locals>.promote_stringsc                    s   i | ]\}} | |qS r"   r"   )r7   kvr   r"   r#   rU     s      zsympy_subs.<locals>.<dictcomp>)Zxreplacer   )rg   r   r"   r   r#   
sympy_subs  s    r   r?   prefixc                    s   t  fdd| jD S )Nc                 3   s   | ]}|j  V  qd S rJ   )r   
startswithr7   r   r   r"   r#   rM     s     z)free_symbol_startswith.<locals>.<genexpr>anyZfree_symbolsr   r"   r   r#   free_symbol_startswith  s    r   r?   patternc                    s   t  fdd| jD S )Nc                 3   s   | ]} |j kV  qd S rJ   r   r   r   r"   r#   rM     s     z"free_symbol_has.<locals>.<genexpr>r   r   r"   r   r#   free_symbol_has  s    r   c                 C   s`   dddddddh}t  r:|dd	d
ddddddddh | jjD ]}t|j|krB dS qBdS )Nz,aten._fused_moving_avg_obs_fq_helper.defaultz7aten._fused_moving_avg_obs_fq_helper_functional.defaultzaten.multinomial.defaultzfbgemm.dense_to_jagged.defaultz%fbgemm.jagged_to_padded_dense.defaultZrun_and_save_rng_stateZrun_with_rng_statezaten._unsafe_index_put.defaultzaten.index_put.defaultzaten.index_put_.defaultzaten.scatter.srczaten.scatter.reducezaten.scatter.value_reducezaten.scatter_add_zaten.scatter_add.defaultzaten.scatter_reduce.twozaten.scatter_reduce_.twozaten.scatter_reduce.two_outTF)r,   Z$are_deterministic_algorithms_enabledupdatero   r   rB   r{   )r}   Zforbidden_setrf   r"   r"   r#   has_incompatible_cudagraph_ops  s6    	r   instance_descriptorZdivisible_by_16Z
equal_to_1Zids_of_folded_argsZdivisible_by_8)defaultsc                 #   s   t  }tjtjd|i tj|d tjtjd iX dV  t	| trt
| dkshtdtj rt }|  fdd|D  W 5 Q R X W 5 Q R X W 5 Q R X dS )	z
    Contextmanager that provides a clean tmp cachedir for inductor.

    Optionally, pass a dict as 'cache_entries' to get a list of filenames and sizes
    generated with this cache instance.
    ZTORCHINDUCTOR_CACHE_DIRr/   ZTRITON_CACHE_DIRNr   z!expected empty cache_entries dictc              	      s,   i | ]$}d |kr|t jt j |qS )z.lock)ospathgetsizer   )r7   fZtriton_cache_dirr"   r#   rU     s    z(fresh_inductor_cache.<locals>.<dictcomp>)tempfileTemporaryDirectoryr   patchdictr   environr   r   rA   rN   rO   existslistdirr   )Zcache_entriesZinductor_cache_dirfilesr"   r   r#   fresh_inductor_cache  s"    
 


r  c                 C   s(   | j }tt| }ttt||ddS )NT)r   reverse)__getitem__r   rN   r   reversedr   )seqgetterZa_rr"   r"   r#   argsort  s    r  r`   c                 C   s   t jd| d S )Nr"   dtype)r,   emptyelement_sizer  r"   r"   r#   get_dtype_size  s    r  c                   @   s   e Zd ZU eed< dS )LineContextcontextN)r   
__module____qualname__r   __annotations__r"   r"   r"   r#   r    s   
r  c                   @   sn   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd ZdddZdddZdS )IndentedBufferr_   r   c                 C   s   g | _ || _d S rJ   )_lines_indent)r   initial_indentr"   r"   r#   __init__  s    zIndentedBuffer.__init__c                 C   s   t  }d}g }| jD ]p}t|tr4| }|d krPqnt|trP|||jf qt|ts^t|	| |	d |d|
d 7 }q| |fS )Nr   r   )r   r  rA   DeferredLineBaser  ru   r  rB   rO   writecountgetvalue)r   bufpZlinemapliner"   r"   r#   getvaluewithlinemap  s     




z"IndentedBuffer.getvaluewithlinemapc                 C   s   |   \}}|S rJ   )r$  )r   r   r   r"   r"   r#   r     s    zIndentedBuffer.getvaluec                 C   s   t  }| jD ]l}t|tr,| }|d kr8qnt|tr8qt|tsFt|drd||d d  q|| |d q|	 S )N\r   )
r   r  rA   r  r  rB   rO   endswithr  r   )r   r!  r#  r"   r"   r#   getrawvalue  s    




zIndentedBuffer.getrawvaluec                 C   s   | j   d S rJ   )r  clearr   r"   r"   r#   r)  0  s    zIndentedBuffer.clearc                 C   s
   t | jS rJ   )boolr  r   r"   r"   r#   __bool__3  s    zIndentedBuffer.__bool__c                 C   s   d| j | j  S )Nr   )r  tabwidthr   r"   r"   r#   r   6  s    zIndentedBuffer.prefixc                 C   sl   t |tr| j| nPt |tr:| j||   n.| r\| j|   |  n| jd d S Nr   )rA   r  r  ru   r  with_prefixr   stripr   r#  r"   r"   r#   	writeline9  s    

zIndentedBuffer.writelinec                 C   s   |D ]}|  | qd S rJ   )r1  )r   linesr#  r"   r"   r#   
writelinesC  s    zIndentedBuffer.writelinesr   c                    s   t j fdd}| S )Nc                	   3   s.    j  7  _ z
d V  W 5  j  8  _ X d S rJ   )r  r"   offsetr   r"   r#   ctxH  s    
z"IndentedBuffer.indent.<locals>.ctx)
contextlibcontextmanager)r   r5  r6  r"   r4  r#   indentG  s    zIndentedBuffer.indentFc                 C   s   t |trtd}|jD ],}t |ts|rt|t|t|  }qt	|rTd}|jD ]4}t |trv| j
| qZt| |t|d   qZn@t|}|r| }|sd S | }|dD ]}| | qd S )Ninfr   r   )rA   r  floatr  r  minrN   lstripmathisinfru   r1  r[   textwrapdedentrstripsplit)r   Z
other_coder/  rA  r#  r"   r"   r#   spliceR  s&    





zIndentedBuffer.spliceN)r   )r   )F)r   r  r  r,  r  r$  r   r(  r)  r+  r   r1  r3  r9  rD  r"   r"   r"   r#   r    s   


r  c                   @   sb   e Zd ZdZdd Zee dddZed ddd	Zd
d Z	dd Z
dd Zdd Zdd ZdS )r  z.A line that can be 'unwritten' at a later timec                 C   s   |  sd}|| _d S r-  )r/  r#  r0  r"   r"   r#   r  m  s    zDeferredLineBase.__init__r)   c                 C   s
   t  dS )zJReturns either self.line or None to indicate the line has been 'unwritten'Nr   r   r"   r"   r#   __call__r  s    zDeferredLineBase.__call__)r#  r*   c                 C   s
   t  dS )z3Returns a new deferred line with the same conditionNrE  r0  r"   r"   r#   	_new_linev  s    zDeferredLineBase._new_linec                 C   s   |  | | j S rJ   rG  r#  )r   r   r"   r"   r#   r.  z  s    zDeferredLineBase.with_prefixc                 C   s   |  | j S rJ   )rG  r#  r=  r   r"   r"   r#   r=  }  s    zDeferredLineBase.lstripc                 C   s   |  | j| S rJ   rH  )r   r?   r"   r"   r#   r	    s    zDeferredLineBase.__getitem__c                 C   s
   t | jS rJ   )r*  r#  r   r"   r"   r#   r+    s    zDeferredLineBase.__bool__c                 C   s
   t | jS rJ   )rN   r#  r   r"   r"   r#   __len__  s    zDeferredLineBase.__len__N)r   r  r  __doc__r  r	   rB   rF  rG  r.  r=  r	  r+  rI  r"   r"   r"   r#   r  j  s   r  c                 C   s(   t j| j}|dk r$td dS dS )NP   z,not enough SMs to use max_autotune_gemm modeFT)r,   r-   Zget_device_propertiesZmulti_processor_countlogwarning)r?   Zsmsr"   r"   r#   
is_big_gpu  s
    
rN  F)enable_int32c                C   st   t jt jt jg}|r(t jt jt jt jg}tjs:tjs:tjordtj	
 dkor| jjdkor| j|kort| jjppdS )NZTRITON,r-   r   )r,   float16bfloat16float32Zint32r   Zmax_autotuneZmax_autotune_gemmZsearch_autotune_cachemax_autotune_gemm_backendsupperrC  r>   rC   r  rN  r?   )ZlayoutrO  Zlayout_dtypesr"   r"   r#   use_triton_template  s     
rV  c                   C   s   dt j dkS )NZATENrP  )r   rT  rU  rC  r"   r"   r"   r#   use_aten_gemm_kernels  s    rW  c                   @   s.   e Zd ZedZdd Zdd Zdd ZdS )	DebugDirManagerr   c                 C   s   t tj| _d | _d S rJ   )nextrX  counterrT   prev_debug_namer   r"   r"   r#   r    s    zDebugDirManager.__init__c                 C   s0   t jjj| _| j d| j | _| jt jj_d S )NZ_tmp_)r,   _dynamor   debug_dir_rootr[  rT   new_namer   r"   r"   r#   	__enter__  s    zDebugDirManager.__enter__c                 G   s   t | j | jtjj_d S rJ   )shutilrmtreer^  r[  r,   r\  r   r]  )r   r'   r"   r"   r#   __exit__  s    zDebugDirManager.__exit__N)	r   r  r  r   r  rZ  r  r_  rb  r"   r"   r"   r#   rX    s   
rX  c              	      s\   ddl m} |j g  fdd}tj|d| tj  | ||}W 5 Q R X |fS )Nr   )GraphLoweringc              	      s0    | }t |j}|  W 5 Q R X |S rJ   )open__file__ru   read)r   modr   compile_to_modulesource_codesr"   r#   patched_compile_to_module  s    z3run_and_get_code.<locals>.patched_compile_to_moduleri  )	ro   rc  ri  r   r  objectr,   r\  reset)r   r'   r(   rc  rk  r   r"   rh  r#   run_and_get_code  s      
rn  c                 O   sH   t | f||\}}dt|  kr,dks@n tdt| |d S )Nr   r^   z%expected one or two code outputs got r   )rn  rN   rO   )r   r'   r(   r   rj  r"   r"   r#   run_and_get_triton_code  s     
ro  c              	   c   sD   ddl m} |j|  }zt|||j| < dV  W 5 ||j| < X dS )z~
    Override the lowering of aten_op with overide_fn.
    The first argument of override_fn is the original lowering fn.
    r   )loweringN)Ztorch._inductorrp  Z	loweringsr%   partial)Zaten_opZoverride_fnrp  orig_fnr"   r"   r#   override_lowering  s    

rs  c                    s4   ddl m} |j  fdd}tjj|d|S )zr
    Add hook functions to be called at the beginning and end of Scheduler.__init__.
    Used for unit tests.
    r   )	Schedulerc                    s&   | |  | |}r"| | |S rJ   r"   )Z	schedulerr   outrr  post_fnpre_fnr"   r#   r     s
    


z(add_scheduler_init_hook.<locals>.wrapperr  )Ztorch._inductor.schedulerrt  r  unittestr   r  rl  )rx  rw  rt  r   r"   rv  r#   add_scheduler_init_hook  s    rz  c                 C   s    t jrt|  n
t|  dS )z
    Warnings that will be actionable for PyTorch developers, but not
    end users.  Allows us to easily disable them in stable releases but
    keep them on for nightly builds.
    N)r   Zdeveloper_warningsrL  rM  infomsgr"   r"   r#   developer_warning  s    r~  num_in_out_args)r'   r  r*   c                    s   t  fddt|D S )z
    Return the total number of bytes the arguments of tensor type takes.

    For in/out args, tensor sizes are counted twice: once for reading and
    once for writing.

    The first num_in_out_args arguments are in out tensors.
    c                 3   s>   | ]6\}}t |tjr| |  d t| k   V  qdS r   N)rA   r,   rr   Znumelr  r[   )r7   ri   rq   r  r"   r#   rM     s   z get_num_bytes.<locals>.<genexpr>)rQ   rt   )r  r'   r"   r  r#   get_num_bytes  s    	r  r   c                 C   sx   | | dd|dd|dd| }z0dd l }| dkrR|dk rR|jj| |jj }W n tk
rr   td	 Y nX |S )
Nz.3fzms    	z GB 	 z7.2fzGB/sr   g~jt?i  z@Colorama is not installed. Install it if you want colored output)coloramaForeZREDRESETr   rL  rM  )msZnum_gbZgb_per_sr   suffixZinfo_strr  r"   r"   r#   create_bandwidth_info_str  s    $r  c                  C   s   z^t jd} | d tt jk r\tt j| d  dkr\t j| d  d dkr\t j| d  W S W n tk
rr   Y nX t jD ]"}|drz|tdd   S qzdS )a  
    An experimental API used only when config.benchmark_kernel is true.

    The benchmark name is only available at codegen time. So we can not
    directly call it in benchmark_all_kernels which is run after codegen.

    The function assumes the argument after --only is the benchmark name.
    It works for torchbench.py/hugginface.py/timm_models.py. But for ad-hoc
    scripts, this function may return None.

    There are 2 flavors of --only argument we need handle:
    1. --only model_name
    2. --only=model_name
    z--onlyr   r   -z--only=N)sysargvr?   rN   
ValueErrorr   )idxrq   r"   r"   r#   get_benchmark_name"  s    

r  c                 C   s   t dd | D S )Nc                 s   s   | ]}|d kV  qdS r  r"   r6   r"   r"   r#   rM   B  s     zis_ones.<locals>.<genexpr>rw   r   r"   r"   r#   is_onesA  s    r  c                 C   s   t dd | D S )Nc                 s   s   | ]}|d kV  qdS )r   Nr"   r6   r"   r"   r#   rM   F  s     zis_zeros.<locals>.<genexpr>r  r  r"   r"   r#   is_zerosE  s    r  c                 C   s   t dd | D S )Nc                 s   s*   | ]"}t |tjr|jtd kV  qdS )cpuN)rA   r,   rr   r>   )r7   itemr"   r"   r#   rM   J  s   z is_cpu_device.<locals>.<genexpr>r  )inputsr"   r"   r#   is_cpu_deviceI  s    r  )r   r*   c                 C   s*   t | tjstd| jr tjS tjS d S )Nz8only support sympy.Expr as input to get_sympy_Expr_dtype)rA   rE   ExprrO   
is_integerr,   Zint64Zfloat64)r   r"   r"   r#   get_sympy_Expr_dtypeQ  s     r  c              	   o   s0   | r&t jj||}|V  W 5 Q R X nd V  d S rJ   )r,   ZprofilerZprofile)Zshould_profiler'   r(   r"  r"   r"   r#   maybe_profile[  s    r  c                 C   s6   t | j }|d| jf |d| jf t|S )z~
    Convert triton config to a tuple that can uniquely identify it. We can use
    the return value as a dictionary key.
    	num_warps
num_stages)r   r(   r   ru   r  r  ry   )cfgr   r"   r"   r#   triton_config_to_hashabled  s    r  Tc                 C   s$   t s| S ttj| |  tjj S rJ   )HAS_COLORAMAr4   r  r  rU  r  )r}  colorr"   r"   r#   _color_textv  s    r  c                 C   s
   t | dS )Ngreenr  r|  r"   r"   r#   
green_text}  s    r  c                 C   s
   t | dS )Nyellowr  r|  r"   r"   r#   yellow_text  s    r  c                 C   s
   t | dS )Nredr  r|  r"   r"   r#   red_text  s    r  c                 C   s
   t | dS )Nbluer  r|  r"   r"   r#   	blue_text  s    r  c               
   C   s.   ddl m}  tjdtjdtdtd| jdi}|S )Nr   r   r[   ZDevicer*  r;  rr   )r   r   r,   r  r>   r*  r;  r   )r   ZPYTHON_TYPE_TO_SCHEMA_TYPEr"   r"   r#   python_type_to_schema_type  s         r  c                 C   s   |rd|  dS | S )Nz	Optional[r   r"   )schema_typeis_optional_argr"   r"   r#   may_get_optional_schema_type  s    r  c                 C   sh   t | tr6tdd | D r2td|}|t|kS dS | jt krdt | j }t||}|t|kS dS )Nc                 s   s*   | ]"}t |tp t |tjo |jV  qd S rJ   )rA   r[   rE   r   r  r6   r"   r"   r#   rM     s   ztype_match.<locals>.<genexpr>z	List[int]F)rA   r   rw   r  rB   	__class__r  )rq   Zarg_typer  Zmay_optional_schema_typer  r"   r"   r#   
type_match  s$    
  r  c                    sT  d}d}| j D ]"}| s"|d7 }|js|d7 }qt|}t|}d}dd }	dd   fdd}
t||kst|	t|||| j D ]}d }d	}||k r|jr d	S || }n|r|j|kr||j }d
}|d kr|
|s d	S |d k	r|j}t|| |s d	S |s|d7 }q|d kr0 |s8|d k	r|d8 }q|dkrPd	S d
S )Nr   r   c                 S   s6   ||kr d| d| d|  dS d| d|  dS d S )Nztakes from z to z positional arguments but z were givenztakes r"   )nargsmax_pos_argsmin_argsr"   r"   r#   args_error_message  s    z(schema_match.<locals>.args_error_messagec                 S   s   dt | jkS )Nr	   )rB   rC   rq   r"   r"   r#   is_optional  s    z!schema_match.<locals>.is_optionalc                    s    | p|   S rJ   )has_default_valuer  r  r"   r#   
allow_none  s    z schema_match.<locals>.allow_noneFT)	argumentsr  Z
kwarg_onlyrN   rO   r   rC   r  )schemar'   r(   r  r  argumentr  Zremaining_kwargsZarg_posr  r  r   Zis_kwdexpected_typer"   r  r#   schema_match  sT    

  







r  c                 C   s"   | D ]}t |||r|  S qd S rJ   )r  )Zschemasr'   r(   r  r"   r"   r#   try_find_schema  s    
r  c                 C   sd   ddl m}m} | tjtjtjfks(t| tjtjfkr@|| S tjj	j
jrV|tjS |tjS d S )Nr   )get_max_simd_tflopsget_max_tensorcore_tflops)r   r  r  r,   rQ  rR  rS  rO   backendsr-   matmulZ
allow_tf32)r  r  r  r"   r"   r#   get_device_tflops  s    
r  c                  C   s   ddl m}  |  S )Nr   get_dram_gbps)r   r  r  r"   r"   r#   get_gpu_dram_gbps
  s    r  c                 C   s
   |  dS )NZwelford)r   Zreduction_typer"   r"   r#   is_welford_reduction  s    r  c                 C   s   t | rdS dS )N   r   )r  r  r"   r"   r#   reduction_num_outputs  s    r  )r   )r"   r   r   r   )N)N)N)r   r   )r   r7  r%   r   r   loggingr>  r;   r   r`  r  r   r@  r   ry  ior   typingr   r   r   r   r   r   r	   r
   r   r   r   r   rE   r,   Ztorch.fx.immutable_collectionsr   r   Ztorch.utils._sympy.functionsr   r   r   r   r   Zcuda_propertiesr   r   	getLoggerr   rL  r   r  Z	VarRangesr   r&   r*  r0   r5   r=   r>   rB   rD   rI   rS   rW   r[   r\   rc   re   rk   rp   r~   r   r;  r   r   __hash__r   r   r   r   r   r   r   r   rs   Noder   r   r   r   r   r   r   r   r   
namedtuplery   r   r8  r  r  r  r  r  r  rN  rV  rW  rX  rn  ro  rs  rz  r~  rr   r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r"   r"   r"   r#   <module>   s   4
 

$


 

	 

i 
	






<