U
    9%e"                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZmZmZ d dlmZ d dlm Z  d dlm!Z! d dlm"Z" d dl
m#Z# d d	lm$Z$ d d
l%m&Z&m%Z% d dl'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z- d dl.Z.d dl/m0Z0m1Z1m2Z2 d dl3m4Z4 d dl5m6Z6m7Z7 e	j89e:Z;e	j8<e	j8<e;Z=e0> rd dl?m@Z@ d dlAmBZB d dlCmDZDmEZEmFZF ndd ZDdd ZEdd ZFdZGd aHdaIdd ZJdd ZKeLeMZNeOddd ZPdd  ZQG d!d" d"ZRG d#d$ d$eRZSG d%d& d&eRZTd'd( ZUdeVd*d+d,ZWdeVeVeVd-d.d/ZXde-eVeYf eVeVd1d2d3ZZde-eVeYf eVeVeVeVd4d5d6Z[eVe-eVeYf d7d8d9Z\ej]G d:d; d;Z^e^d<d=d>Z_d?d@ Z`eOdAdBdC ZadDdE ZbdFdG ZceOddHdI ZdG dJdK dKZeej]G dLdM dMeeZfej]G dNdO dOeeZgG dPdQ dQeeZheh Zief eg gZjeOddRdS ZkdTdU ZlddWdXZmddYdZZnd[d\ Zod]d^ Zpd_d` Zqdadb Zrdcdd Zsdedf ZteOddgdh ZueOddidj ZvdkeidkdkfeedldmdnZwdVdVdkeidkdkfeedldodpZxG dqdr drZyG dsdt dtZzejOdudv Z{dwdx Z|ddydzd{Z}G d|d} d}Z~G d~d dZG dd dZG dd dZdd Zdd ZG dd dZG dd dZe  dS )    N)bisect_right)FutureProcessPoolExecutorThreadPoolExecutor)cdll)field)partial)abc)Path)Thread)sleeptime)AnyCallableDictListSetUnion)configcuda_propertiesexc)developer_warning)	_Faketqdmtqdm)build_paths)_run_build_command)log_global_cache_statslog_global_cache_valsuse_global_cachec                  O   s   d S N argskwargsr    r    X/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torch/_inductor/codecache.pyr   8   s    r   c                  O   s   d S r   r    r!   r    r    r$   r   ;   s    r   c                   C   s   dS NFr    r    r    r    r$   r   >   s    r   iX  c                   C   s   t d krt a d S r   )_t0r   r    r    r    r$   _compile_startI   s    r'   c                  C   s"   t d k	rt } t| t  7 ad a d S r   )r&   r   _cumulative_compile_time)t1r    r    r$   _compile_endO   s    r*   c                  C   s<   t jd} | d kr*t  dt  } t j| dd | S )NZTORCHINDUCTOR_CACHE_DIRz/torchinductor_Texist_ok)osenvirongettempfile
gettempdirgetpassgetusermakedirs)	cache_dirr    r    r$   r5   [   s
    r5   c                 C   sz   t jjd krdndt jjdd }dtjj tjj }| d| }tj	
t |}tj	
|| }tj|dd |S )	Ncpucu. py_Tr+   )torchversioncudareplacesysversion_infomajorminorr-   pathjoinr5   r4   )nameZcu_strpython_versionZbuild_foldercpp_wrapper_dirZcpp_wrapper_build_directoryr    r    r$   cpp_wrapper_cache_dird   s    
rI   c                   @   sf   e Zd Zeeddd Zeeddd Zeeddd Zdd	 Z	d
d Z
dd ZdS )	CacheBaseNc                  C   s   zdd l } | j}W n tk
r*   d }Y nX dtjtj jitjj|ddtj	jj
jid}ttj|ddd |d	< |S )
Nr   rF   )r>   triton
allow_tf32)devicer=   otherT)	sort_keysutf-8hash)rK   __version__ModuleNotFoundErrorr<   r>   Zget_device_propertiescurrent_devicerF   r=   backendsmatmulrL   hashlibsha256jsondumpsencode	hexdigest)rK   Ztriton_versionsystemr    r    r$   
get_systemt   s*    

  
zCacheBase.get_systemc                   C   s   t tjt dt d S )NcacherQ   )r
   r-   rD   rE   r5   rJ   r^   r    r    r    r$   get_local_cache_path   s    zCacheBase.get_local_cache_pathc                   C   s*   t jd k	r&ttjt jt d S d S )NrQ   )r   Zglobal_cache_dirr
   r-   rD   rE   rJ   r^   r    r    r    r$   get_global_cache_path   s    zCacheBase.get_global_cache_pathc                 C   s0   t j sd S t | _t | _t | _	d S r   )
r<   r>   Zis_availablerJ   r^   r]   r`   local_cache_pathra   global_cache_pathselfr    r    r$   __init__   s
    


zCacheBase.__init__c              	   C   s6   | j  si S t| j }t|}W 5 Q R X |d S Nr_   )rb   is_fileopenrY   load)re   Zlocal_cache_fplocal_cacher    r    r$   get_local_cache   s
    
zCacheBase.get_local_cachec                 C   sD   t j| jjs"t j| jjdd t| jtj| j	|ddd d S )NTr+   )r]   r_      )indent)
r-   rD   existsrb   parentr4   write_atomicrY   rZ   r]   )re   rk   r    r    r$   update_local_cache   s    zCacheBase.update_local_cache)__name__
__module____qualname__staticmethod	functools	lru_cacher^   r`   ra   rf   rl   rr   r    r    r    r$   rJ   s   s   	rJ   c                   @   s2   e Zd Zee dddZee edddZdS )
LocalCache)keysc                 G   s2   |   }|}|D ]}||kr&|| }q d S q|S r   )rl   )re   rz   r_   	sub_cachekeyr    r    r$   lookup   s    
zLocalCache.lookup)rz   valuec                G   sL   |   }|}|dd D ]}||i  || }q|||d < | | d S )Nr   )rl   
setdefaultrr   )re   r~   rz   r_   r{   r|   r    r    r$   	set_value   s    
zLocalCache.set_valueN)rs   rt   ru   r   strr}   r   r   r    r    r    r$   ry      s   ry   c                   @   s:   e Zd Zeddd Zeeeege	f dddZ
dS )PersistentCacheNc              	   C   s@   | j d ks| j  si S t| j }t|}W 5 Q R X |d S rg   )rc   rh   ri   rY   rj   )re   Zglobal_cache_fpZglobal_cacher    r    r$   get_global_cache   s
    z PersistentCache.get_global_cache)rF   inputs	benchmarkc                    s   t t| j}t t| j}i d fdd	}tjsDtjr|  }||st rj|| 	 |ds D ]D}	||	|	< |
i  | 
i  |	 |  |	 < qn| | t rfdd D }
||
 nt r|| 	 |d S )a  
        Check to see if we have benchmarked the given choice callers. For each
        choice caller:

            1. Check global_cache[name][inputs][choice], return benchmark if cached.
            2. Check local_cache[name][inputs][choice], return benchmark if cached.
            3.
                a. `max_autotune_gemm=True`: benchmark the choice, update
                    local_cache[name][inputs][choice], and return the benchmark.
                b. `max_autotune_gemm=False`: don't benchmark the choice, return nothing.
        Nc                    s^   d} D ]B}|  }|| i i krB|   | |< qd} qLq|rZ||d |S )z2Check if `cache` contains data for all the choicesTF)cached)hash_keyr/   )r_   callbackhitchoiceZchoice_hashchoicesr   rF   timingsr    r$   check_cache   s    
z+PersistentCache.lookup.<locals>.check_cache)r   c                    s   i | ]}|   | qS r    )r   ).0r   )r   r    r$   
<dictcomp>  s     z*PersistentCache.lookup.<locals>.<dictcomp>)N)r   r   r]   r   r   Zmax_autotuneZmax_autotune_gemmrl   r   r   r   r   rr   )re   r   rF   r   r   Z	log_statsZlog_valsr   rk   r   Ztimings_to_logr    r   r$   r}      s2    


zPersistentCache.lookup)rs   rt   ru   rw   rx   r   r   r   r   floatr}   r    r    r    r$   r      s   

r   c                  C   s.   t jt d} t j| s*t j| dd | S )NlocksTr+   )r-   rD   rE   r5   ro   r4   )lock_dirr    r    r$   get_lock_dir   s    r   r9   extrac                 C   sH   | }|dkr|d | }dt t|d d d d  S )Nr9   z||crP   3   )base64	b32encoderW   rX   r[   digestdecodelower)coder   Zhashing_strr    r    r$   	code_hash'  s    "r   )basename	extensionspecified_dirc                 C   sb   |r(t j|r|}q@t jt |}nt jt | dd }t j||  d| }| ||fS )N      r8   )r-   rD   isabsrE   r5   )r   r   r   subdirrD   r    r    r$   get_path3  s    r   r   contentr   	hash_typec                 C   s:   |dkst d|dkr"t| |S |dkr6tt| S d S )N)r   cubinzHash type not supportedr   r   )AssertionErrorr   reprr   r    r    r$   get_hash?  s
    
r   )r   r   r   r   r   c           	      C   sV   t | ||}t|||\}}}tj|s8tj|dd tj|sNt||  ||fS )NTr+   )r   r   r-   rD   ro   r4   rq   )	r   r   r   r   r   r|   r   r   rD   r    r    r$   writeG  s    
r   )rD   r   c              	   C   s   t |ttfstdt| } | jdt  dt	
  d }t |trNdnd}||}|| W 5 Q R X ||  d S )Nz6Only strings and byte arrays can be saved in the cacher8   z.tmpwwb)
isinstancer   bytesr   pathlibr
   rp   r-   getpid	threading	get_identri   r   rename)rD   r   Ztmp_pathZ
write_modefr    r    r$   rq   W  s     
 rq   c                   @   s   e Zd ZU dZdZeed< dZeed< dZe	ed< dZ
e	ed< dZeed< eedZee	 ed	< eedZee ed
< eedZee	 ed< eedZee ed< dZeed< edddZdd ZdS )CompiledFxGraphz!Class holding a compiled FX graphNcompiled_artifactcurrent_callable	cache_keyartifact_pathcache_linemap)default_factorydevice_typesdevice_idxsmutated_inputsmutated_input_idxs_boxed_callreturnc                 C   s   |   |S r   )get_current_callable)re   r   r    r    r$   __call__u  s    zCompiledFxGraph.__call__c                 C   s&   | j d krttt| S | j S d S r   )r   rw   r   _run_from_cacheweakrefproxyrd   r    r    r$   r   x  s    
z$CompiledFxGraph.get_current_callable)rs   rt   ru   __doc__r   r   __annotations__r   r   r   r   r   r   r   setr   r   r   intr   listr   r   boolr   r   r   r    r    r    r$   r   e  s   
r   )compiled_graphc                 C   sF   | j d kr<ddlm} || j| j| jd k	r2| jndj| _ |  |S )Nr   )PyCodeCacher    )r   Z	codecacher   load_by_key_pathr   r   r   call)r   r   r   r    r    r$   r     s    
r   c                  C   sB   t  rt S tt jjttfr0tt jj} n
t jjf} t	| S r   )
r   	is_fbcoder   gccr   cppcxxr   tuplecpp_compiler_search)searchr    r    r$   cpp_compiler  s    
r   r   c                 C   s   | D ]}zz|d krlt jdkr W qtds.W qddlm} t }|tj|dt	d}| t
 }W 5 Q R X t|dg |W   S  tjttfk
r   Y qY qX qt d S )NlinuxZTORCH_INDUCTOR_INSTALL_GXXr   FileLockzg++.locktimeout	--version)r@   platformr-   getenvfilelockr   r   rD   rE   LOCK_TIMEOUTinstall_gcc_via_conda
subprocesscheck_outputSubprocessErrorFileNotFoundErrorImportErrorr   ZInvalidCxxCompiler)r   r   r   r   lockr    r    r$   r     s(    

 
r   c               	   C   s   t jt d} t j| dd}t j|std t jdd}|dkrVt	
d}|dk	rtj|dd	|  d
ddddgtjd |S )z>On older systems, this is a quick way to get a modern compilerr   binzg++zDownloading GCC via condaZ	CONDA_EXEcondaNcreatez	--prefix=z--channel=conda-forgez--quietz-yz
python=3.8Zgxx)stdout)r-   rD   rE   r5   ro   loginfor.   r/   shutilwhichr   
check_callPIPE)prefixZcxx_pathr   r    r    r$   r     s*    


r   c                   C   s   t dt S )Nz(gcc|g\+\+))rer   r   r    r    r    r$   is_gcc  s    r   c                  C   s*   t  } t| dgd}d| d kS )Nr   utf8ZAppler   )r   r   r   r   
splitlines)r   version_stringr    r    r$   is_apple_clang  s    r  c                   @   s   e Zd ZU eed< eed< eed< eejef ed< dZ	dZ
dd Zejfejd	d
dZdd Zdd ZedddZeddd ZdS )VecISA
_bit_width_macro_arch_flags_dtype_nelementsa[  
#if defined(CPU_CAPABILITY_AVX512) || defined(CPU_CAPABILITY_AVX2)
#include <ATen/cpu/vec/functional.h>
#include <ATen/cpu/vec/vec.h>
#endif

__attribute__((aligned(64))) float in_out_ptr0[16] = {0.0};

extern "C" void __avx_chk_kernel() {
    auto tmp0 = at::vec::Vectorized<float>(1);
    auto tmp1 = tmp0.exp();
    tmp1.store(in_out_ptr0);
}
zG
import torch
from ctypes import cdll
cdll.LoadLibrary("__lib_path__")
c                 C   s   | j S r   )r  rd   r    r    r$   	bit_width  s    zVecISA.bit_width)dtypec                 C   s
   | j | S r   )r  )re   r  r    r    r$   	nelements  s    zVecISA.nelementsc                 C   s   | j S r   )r  rd   r    r    r$   build_macro  s    zVecISA.build_macroc                 C   s   | j S r   )r  rd   r    r    r$   build_arch_flags  s    zVecISA.build_arch_flagsr   c                 C   s   t t| S r   )rQ   r   rd   r    r    r$   __hash__  s    zVecISA.__hash__Nc           	      C   s  t jjd k	rt jjS ttjd\}}ddlm} t }|t	j
||d td}| |d d d }tt||d| d	}zFt||| tjtjd
tjd|gtjt	jddtj
id W n2 tk
r } zW Y W 5 Q R  dS d }~X Y nX W 5 Q R  dS Q R X d S )Nr   r   r   .lockr   soF)warning_allvec_isaz-cZ__lib_path__
PYTHONPATH:)stderrenvT)r   r   Z
vec_isa_okr   r  	_avx_coder   r   r   r-   rD   rE   r   shlexsplitcpp_compile_commandcompile_filer   r   r@   
executable_avx_py_loadr?   DEVNULLr.   	Exception)	re   r|   
input_pathr   r   r   output_pathZ	build_cmder    r    r$   __bool__  s:       
	"zVecISA.__bool__)rs   rt   ru   r   r   r   r   r<   r  r  r  r  r   r	  r
  r  r  rw   rx   r"  r    r    r    r$   r    s   
r  c                   @   sV   e Zd ZU dZdZdZejdejdej	diZ
edddZejZeegef ed	< d
S )	VecAVX512i   ZCPU_CAPABILITY_AVX512z0-mavx512f -mavx512dq -mavx512vl -mavx512bw -mfma       r   c                 C   s   dS )NZavx512r    rd   r    r    r$   __str__A  s    zVecAVX512.__str__r  Nrs   rt   ru   r  r  r  r<   r   Zbfloat16Zfloat16r  r   r&  r  r  r   r   r   r    r    r    r$   r#  :  s   
r#  c                   @   sV   e Zd ZU dZdZdZejdejdej	diZ
edddZejZeegef ed	< d
S )VecAVX2   ZCPU_CAPABILITY_AVX2z-mavx2 -mfma   r$  r   c                 C   s   dS )NZavx2r    rd   r    r    r$   r&  N  s    zVecAVX2.__str__r  Nr'  r    r    r    r$   r(  G  s   
r(  c                   @   sL   e Zd ZU dZdZdZi ZedddZdd Z	e
jZee
gef ed< d	S )
InvalidVecISAr   r9   r   c                 C   s   dS )NZINVALID_VEC_ISAr    rd   r    r    r$   r&  Z  s    zInvalidVecISA.__str__c                 C   s   dS r%   r    rd   r    r    r$   r"  ]  s    zInvalidVecISA.__bool__r  N)rs   rt   ru   r  r  r  r  r   r&  r"  r  r  r   r   r   r    r    r    r$   r+  T  s   
r+  c               
   C   sb   t jdkrg S g } td>}| }tD ]}t||kr(|r(| | q(| W  5 Q R  S Q R X d S )Nr   z/proc/cpuinfo)r@   r   ri   readsupported_vec_isa_listr   append)Zisa_listZ	_cpu_infoZ_cpu_info_contentisar    r    r$   valid_vec_isa_listj  s    

r0  c                  C   sP   t  } | stS tjjd kr*| s"t| d S | D ]}tjj| kr.|  S q.tS )Nr   )r0  invalid_vec_isar   r   Zsimdlenr   r  )Z_valid_vec_isa_listr/  r    r    r$   pick_vec_isax  s    
r2  Tc                 C   s   | rdS dS )Nz-shared -fPICr9   r    )sharedr    r    r$   
get_shared  s    r4  c                 C   s   | rdS dS )Nz-Wallr9   r    )r  r    r    r$   get_warning_all_flag  s    r5  c                   C   s   dS )Nz-std=c++17 -Wno-unused-variabler    r    r    r    r$   	cpp_flags  s    r6  c                   C   s   dS )Nz-DTORCH_INDUCTOR_CPP_WRAPPERr    r    r    r    r$   cpp_wrapper_flags  s    r7  c                  C   sV   d} t  r| S tjdkr$| d7 } nt dkr:| d7 } n| d7 } t  sR| d7 } | S )Nz%-O3 -ffast-math -fno-finite-math-onlydarwinz -Xclangppc64lez -mcpu=nativez -march=nativez	 -fopenmp)r   r   r@   r   machine)Z
base_flagsr    r    r$   optimization_flags  s    


r;  c                   C   s   dS )Nz$-D C10_USING_CUSTOM_GENERATED_MACROSr    r    r    r    r$   use_custom_generated_macros  s    r<  c                  C   s$   t  rt } d|  dS dS d S )Nz-Wp,-fopenmp z( -D C10_USE_GLOG -D C10_USE_MINIMAL_GLOGr9   )r   r   r   
openmp_lib)r=  r    r    r$   use_fb_internal_macros  s    r>  c                   C   s   t  rdS dS d S )Nz	-nostdincr9   )r   r   r    r    r    r$   use_standard_sys_dir_headers  s    r?  c                  C   sJ   z,d} t |  d}tt|dkW S  t jk
rD   Y dS X d S )Nzconda list llvm-openmp --jsonr   r   F)r   r   r  r   lenrY   loadsr   )commandoutputr    r    r$   is_conda_llvm_openmp_installed  s    rD  c                  C   s\   z>t ddg t dddgd } tj| }|| fW S  t jk
rV   Y dS X d S )Nr   Zbrewz--prefixlibompr   )Fr9   )r   r   r   stripr-   rD   ro   r   )libomp_pathomp_availabler    r    r$   homebrew_libomp  s    
rI  Fr  c                 C   s  t  r2dtjkr2dtjkr2tjt tjd< ddlm	} |rNt  rNd}d}t
jdkr| sx|tksx|sxt jjr||tdg }||td	g }g }t  s|d
ddg7 }|dg7 }|s|dg7 }n"|dg7 }|r|tjt g7 }| }|r^t  rT|tkrTt| }	d| d|	 d|	 d|	 dg}n
d| }|r*t  rz|dg7 }n|dddg7 }n||tdg }g }t
jdkrt  }
tdd k	r<tjtddd}tj|}|r(|tjtdd |tjtdd n
td |
p:|}
|
rFg ndg}|
stdd k	rt  }
|
rtjtdd}|tjtdd || t! j"dkrtjtj|d rd!g}|
s*t# \}
}|
r*|tj|d |tj|d nt  r$dgndg}t  r|t$  |t%  |t&  |t'  |t(  |t)  |t*  |t+  |t,  |d dd"d# |D }dd$d# |D }dd%d# |D }||||fS )&NZ	CUDA_HOMEZ	CUDA_PATHr   )cpp_extensionTr9   r   includeLIBDIRZc10r<   Z	torch_cpugompZtorch_pythonZomp z-D CPU_CAPABILITY=z-D CPU_CAPABILITY_z-D HAVE_Z_CPU_DEFINITIONz-Dr>   Zc10_cudaZ
torch_cudar8  Z
OMP_PREFIXzomp.hlibz-environment variable `OMP_PREFIX` is invalid.ZCONDA_PREFIXx86_64zlibiomp5.dylibZiomp5c                 S   s   g | ]}d | qS )z-Ir    r   pr    r    r$   
<listcomp>^  s     z1get_include_and_linking_paths.<locals>.<listcomp>c                 S   s   g | ]}d | qS )z-Lr    rR  r    r    r$   rT  _  s     c                 S   s   g | ]}d | qS )z-lr    rR  r    r    r$   rT  `  s     )-r   r   r-   r.   rD   dirnamer   r>   Ztorch.utilsrK  r@   r   r1  r   Zenable_kernel_profileZinclude_paths	sysconfigr   Zlibrary_pathsget_config_varcpp_prefix_pathr
  r   upperrE   r  r  r   ro   r.  warningswarnrD  unamer:  rI  ZsleefZopenmpZgcc_includeZlibgccZlibgcc_archZlibgcc_backwardglibcZlinux_kernelZgcc_install_tools_include)include_pytorchr  r>   aot_moderK  macrosipathslpathslibscaprH  header_pathZ	valid_envZconda_lib_pathrG  r    r    r$   get_include_and_linking_paths  s    	


	







rf  c              #   C   s   t ||||\}}	}
}t rr|r,| }|}ntj| }tj|}tjt t	 g}d
dd |D }n| }|}d}tdddt  d| dt| dt| dt  d| d|	 d|
 d| d| dt  dt  dt  dt  d| d S )	NrO  c                 S   s   g | ]}d | qS )z-Br    rR  r    r    r$   rT  z  s     z'cpp_compile_command.<locals>.<listcomp>r9   z[ \n]+z
            z
            -o z	
        )rf  r   r   r-   rD   r   rU  r   ldZ	glibc_librE   r   subr   r4  r5  r6  r;  r<  r>  r?  rF  )inputrC  r  r3  r^  r  r>   r_  ra  rb  rc  r`  Zinp_nameZout_nameZlinker_pathsr    r    r$   r  d  sh    
   r  c                   @   s4   e Zd Ze ZeejZedd Zedd Z	dS )CudaKernelParamCachec                 C   s,   t |ddtjd\}}||d< || j|< d S )Nr   )r   r   Z
cubin_path)r   r   aot_inductor_output_pathr_   )clsr|   paramsr   r;   rD   r    r    r$   r     s    
zCudaKernelParamCache.setc                 C   s   | j |d S r   )r_   r/   )rl  r|   r    r    r$   r/     s    zCudaKernelParamCache.getN)
rs   rt   ru   dictr_   rv   clearclassmethodr   r/   r    r    r    r$   rj    s   


rj  c                   @   s(   e Zd Ze ZeejZedd ZdS )AotCodeCachec                    sD  |rt nt }ttdd||jd}t|d|tjd\} jkr0ddl	m
} t }|tj|d td	}	|	 tj|d d
 }
tj|
stt||
||jd}tdd| zt| W n6 tjk
r } zt||j|W 5 d }~X Y nX ntd|
 |
 j< W 5 Q R X  fdd}|S )Nio)r  r>   r_  r   )r   r   r   r   r  r   z.so)ri  rC  r  r>   r_  zaot compilation command: %srO  z.aot_inductor dynamic library already exist: %sc                     s:   t jdkst j fdd tt jd D S )Nr   c                 s   s   | ]
}d V  qd S r   r    )r   rr  r    r    r$   	<genexpr>  s     z=AotCodeCache.compile.<locals>.wrapper_call.<locals>.<genexpr>r   )r@  Zgraph_outputsr   r_   range)r"   rl  graphr|   r    r$   wrapper_call  s    z*AotCodeCache.compile.<locals>.wrapper_call)r1  r2  r   r  r_  r   r   rk  r_   r   r   r   r-   rD   rE   r   splitextro   r  r  r   debugr   r   CalledProcessErrorr   CppCompileErrorrC  )rl  rw  source_coder>   picked_vec_isacpp_commandr  r   r   r   Z	output_socmdr!  rx  r    rv  r$   compile  sX        
	$ zAotCodeCache.compileN)	rs   rt   ru   rn  r_   rv   ro  rp  r  r    r    r    r$   rq    s   
rq  c               	   C   s<   t tjd } |  }| }t|d\}}W 5 Q R X |S )Nzcodegen/cpp_prefix.hh)r
   __file__rp   ri   r,  r   )rD   r   r   r;   filenamer    r    r$   rX    s    
rX  c                  C   s2   t  } t r"dtj|  dS d|  dS d S )Nz
#include "")rX  r   r   r-   rD   r   )r  r    r    r$   
cpp_prefix  s    r  r   c              
   C   sf  t  rtj| n| }zt  rt }tj|}tj|}tjtjj	j
d}t z}t|tj|| t| tj|| tj|d}	t||	 t|||}
tj|rt| t|
| W 5 Q R X ntj|tjd W np tjk
r` } zN|jd}d|kp"d|k}|rBtjdkrBd}||7 }t|||W 5 d }~X Y nX d S )NrL  )r  rP   z'omp.h' file not foundrE  r8  a  

OpenMP support not found. Please try one of the following solutions:
(1) Set the `CXX` environment variable to a compiler other than Apple clang++/g++ that has builtin OpenMP support;
(2) install OpenMP via conda: `conda install llvm-openmp`;
(3) install libomp via brew: `brew install libomp`;
(4) manually setup OpenMP and set the `OMP_PREFIX` environment variable to point to a path with `include/omp.h` under it.)r   r   r-   rD   r   rX  rE   r<   utilsrK  _TORCH_PATHr0   TemporaryDirectoryr   copycopytreer   ro   remover   r   STDOUTr{  rC  r   r@   r   r   r|  )r  r   r  Z
input_filere  header_nameZoutput_nameZtorch_includes_pathZtmp_dirZdest_include_pathZoutput_file_pathr!  rC  Zopenmp_problemZinstructionr    r    r$   r    s8     

	r  c                   @   s4   e Zd Ze ZeejZedd Zedd Z	dS )CppCodeCachec              
   C   s   zt | W S  tk
r } zjdt|krRtjdrRt dat |  W Y 8S dt|krt| dt	  dt	  d| W 5 d }~X Y nX d S )NrN  z/usr/lib64/libgomp.so.1z(failed to map segment from shared objectz3.  The most common reason this may occur is if the zl folder is mounted with noexec (e.g., by default Docker mounts tmp file systems as noexec).  Please remount zi with exec enabled, or set another temporary directory with TORCHINDUCTOR_CACHE_DIR environment variable.)
r   ZLoadLibraryOSErrorr   r-   rD   ro   Z_libgompr0   r1   )rD   r!  r    r    r$   _load_library,  s    
zCppCodeCache._load_libraryc              	   C   s   t  }ttdd|d}t|d|d\}}|| jkrddlm} t }|tj	
||d td	}|^ |d d
 d }	tj	|	stt||	|d}
t||	|
 | |	| j|< || j| _W 5 Q R X | j| S )Nrr  rs  rJ  r   r   r   r   r  r   r  r  )ri  rC  r  )r2  r   r  r   r_   r   r   r   r-   rD   rE   r   ro   r  r  r  r  r|   )rl  r}  r~  r  r|   r  r   r   r   r   r  r    r    r$   rj   ?  s*    
  zCppCodeCache.loadN)
rs   rt   ru   rn  r_   rv   ro  r  rp  rj   r    r    r    r$   r  (  s   

r  c                   @   sv   e Zd ZU e Zeeejf e	d< e Z
eejZedddZedddZeddd	Zeed
dd Zd
S )r   r_   r9   c                 C   s   t |d|dS Nr:   r   )r   )rl  r}  r   r    r    r$   r   ]  s    zPyCodeCache.writer    c                 C   s    t |d|d\}}| |||S r  )r   r   )rl  r}  r   linemapr|   rD   r    r    r$   rj   a  s    zPyCodeCache.loadc                 C   s   || j krt|}zt| |d}W nB tk
rj } z$td| dt|j d| W 5 d }~X Y nX t	t d| }||_
||_t||j|j |tj|j< | j || tt| | j|< W 5 Q R X | j | S )NexeczFailed to import 
z: r8   )r_   ri   r  r,  r  RuntimeErrortypers   types
ModuleTyper  r|   r  __dict__r@   modulesr   r   ziplinemaps)rl  r|   rD   r  r   r   r!  modr    r    r$   r   f  s     

zPyCodeCache.load_by_key_pathNc                 C   sV   || j krd S | j | \}}t||}|dkr2d S ||d  }|sFd S dd }||S )Nr   r   c                 S   s"   d}t || }dd t|D S )Nz"File "(.+)", line (\d+), in (.+)\nc                 S   s"   g | ]\}}}|t ||d qS ))r  linerF   )r   )r   r   lnr    r    r$   rT    s   zPPyCodeCache.stack_frames_for_code.<locals>.parse_stack_trace.<locals>.<listcomp>)r   findallreversed)Zstack_traceregexmatchesr    r    r$   parse_stack_trace  s
    z<PyCodeCache.stack_frames_for_code.<locals>.parse_stack_trace)r  r   )rl  rD   linenolinesZnodesrS  entryr  r    r    r$   stack_frames_for_code|  s    


z!PyCodeCache.stack_frames_for_code)r9   )r9   r    )r    )rs   rt   ru   rn  r_   r   r   r  r  r   r  rv   ro  rp  r   rj   r   rw   rx   r  r    r    r    r$   r   X  s   

r   c                   @   s(   e Zd Ze ZeejZedd ZdS )CppWrapperCodeCachec                 C   s  d| }t |}tj|s(t| d}tj|| d| }td| || jkrtd| ddl	m
}	 t }
|	tj|
|d td	}|< tj|shtd
| t }t }t }t }tt |d\}}}}t }t }| d| d| d| d| d| }| d| d| d}| }tjjj|||g|g|g|g|gdd}td| n`td| tj||}|d k	sttj|}t|jt j!st|j"| td| || j|< W 5 Q R X | j| S )NZinline_extension_r  r8   zCpp wrapper code path %szCpp wrapper cache miss for %sr   r   r  r   zCpp wrapper building %s)r  r>   rO  z                     z -ffast-mathT)rF   Zbuild_directoryZcpp_sourcesZ	functionsextra_cflagsextra_ldflagsextra_include_pathsZuse_pchzCpp wrapper done building %sz(Found target .so, cpp wrapper loading %szCpp wrapper done loading %s)#rI   r-   rD   ro   r4   rE   r   rz  r_   r   r   r   r   r6  r;  r4  r5  rf  r2  r<  r7  r<   r  rK  Zload_inline	importlibutilspec_from_file_locationr   module_from_specr   loaderr	   Loaderexec_module)rl  r}  	func_namer|   r>   rF   rH   extfilepathr   r   r   Z
_cpp_flagsZ
_opt_flagsZ_sharedZ_warning_all_flagZ_ipathsZ_lpathsZ_libsZ_macrosZ_use_custom_generated_macrosZ_cpp_wrapper_flagsr  r  r  r  specr    r    r$   rj     sd    


zCppWrapperCodeCache.loadN)	rs   rt   ru   rn  r_   rv   ro  rp  rj   r    r    r    r$   r    s   
r  c                   @   s   e Zd Zedd ZdS )TritonCodeCachec                 C   s   t |}t||S r   )r   rj   getattr)rl  kernel_namer}  r  r    r    r$   rj     s    
zTritonCodeCache.loadN)rs   rt   ru   rp  rj   r    r    r    r$   r    s   r  c                 C   s&   t | t| |}|j|d d S )N)Zwarm_cache_only_with_cc)r   Z"set_compiler_worker_current_devicer  rj   
precompile)r  r}  ccrM   kernelr    r    r$   _worker_compile  s    
r  c                 C   s   t | |}|  |S r   )r  rj   r  )r  r}  r  r    r    r$   _load_kernel  s    r  c                   @   s   e Zd Zdd Zdd ZdS )TritonFuturec                 C   s   || _ || _|| _d S r   )r  r}  future)re   r  r}  r  r    r    r$   rf     s    zTritonFuture.__init__c                 C   sv   t  }t| dr| jS | j  t| j| j }| _t  | }|dkrftd| d| j  t| j | `| `| `|S )Nr  2   z"Detected long compilation time of z seconds for kernel name )	r   hasattrr  r  resultr  r  r}  r   )re   t0r  Zlatencyr    r    r$   r    s    



zTritonFuture.resultN)rs   rt   ru   rf   r  r    r    r    r$   r    s   r  c                   @   s   e Zd Zdd Zeeddd Zeeddd Ze	dd	 Z
e	d
d Ze	dd Zdd Zdd Zeeef dddZdS )AsyncCompilec                 C   s   d S r   r    rd   r    r    r$   rf   
  s    zAsyncCompile.__init__r   c                   C   s   t jdksttt jS Nr   )r   compile_threadsr   r   r    r    r    r$   pool  s    zAsyncCompile.poolc                     s^   t   tjdkstt   fdd} td}t	tj|| d}tj
jd |jtjd |S )Nr   c                     s$    fdd} t | ddat  d S )Nc                      s,   t d  t kr tt tj q d S r  )r   r-   getppidkillr   signalSIGKILLr    Z	orig_ppidr    r$   run#  s    z4AsyncCompile.process_pool.<locals>.init.<locals>.runT)targetdaemon)r   Z_watchdog_threadstart)r  r  r    r$   init"  s    z'AsyncCompile.process_pool.<locals>.initfork)Z
mp_contextZinitializer)Zexitpriority)r   Z_propertiesr   r  r   r-   r   multiprocessingZget_contextr   r  ZFinalizeshutdownr@   maxsize)r  Zfork_contextr  r    r  r$   process_pool  s    
  zAsyncCompile.process_poolc                 C   sZ   t jdkrd S t  |  }t|dr0|  n tt jD ]}|  q:|  t	  d S )Nr   _start_queue_management_thread)
r   r  r'   r  r  r  ru  Z_adjust_process_countZ_start_executor_manager_threadr*   )rl  r  r;   r    r    r$   	warm_pool:  s    



zAsyncCompile.warm_poolc                 C   s   t jdkr| S |  |S r  )r   r  r  submit)rl  taskr    r    r$   r  W  s    
zAsyncCompile.submitc                    sB   t jdkst|dkr$tt|S dd  fdd|D D S )Nr   c                 S   s   g | ]}|  qS r    )r  )r   tr    r    r$   rT  a  s     z$AsyncCompile.map.<locals>.<listcomp>c                    s   g | ]}   |qS r    )r  r  )r   xrl  fnr    r$   rT  a  s     )r   r  r@  r   map)rl  r  seqr    r  r$   r  ]  s    zAsyncCompile.mapc                 C   sd   t   tjdkrVtj \}}tj }|d | }|  t	||||}t
|||S t||S d S )Nr   
   )r'   r   r  r<   r>   Zget_device_capabilityrT   r  r  r  r  r  )re   r  r}  rB   rC   rM   r  r  r    r    r$   rK   c  s    

    zAsyncCompile.tritonc                    s    fdd}|  |S )Nc                      s   t  jS r   )r  rj   r  r    r}  r    r$   r  r  s    zAsyncCompile.cpp.<locals>.task)r  )re   r}  r  r    r  r$   r   q  s    zAsyncCompile.cpp)scopec                 C   s   t dd | D }t|dtjdd}tjdkr| D ]F\}}tjr\t|ts\|	| t|t
tfr:| ||< |d q:t  d S )Nc                 S   s"   g | ]\}}t |ttfr|qS r    )r   r   r  )r   r|   r~   r    r    r$   rT  y  s   z%AsyncCompile.wait.<locals>.<listcomp>zInductor Compilationr   )totaldescdisabledelayr   )r@  itemsr   r   Zdisable_progressr  Zverbose_progressr   r   Zset_postfix_strr   r  r  updater*   )re   r  Znum_kernelsZpbarr|   r  r    r    r$   waitw  s&    

zAsyncCompile.waitN)rs   rt   ru   rf   rv   rw   rx   r  r  rp  r  r  r  rK   r   r   r   r   r  r    r    r    r$   r  	  s    %


r  )r9   )r9   )r9   r   )r9   r   r9   )T)T)r   dataclassesrw   r2   rW   r  rY   loggingr  r-   r   r   r   r  r   r  r   r@   rV  r0   r   r  rZ  r   bisectr   concurrent.futuresr   r   r   ctypesr   r   r   r	   r
   r   r   r   typingr   r   r   r   r   r   r<   Ztorch._inductorr   r   r   Ztorch._inductor.utilsr   Z	torch.hubr   r   rD   abspathr  Z_HERErU  r  r   Z	triton.fbr   Ztriton.fb.buildr   Ztorch._inductor.fb.utilsr   r   r   r   r(   r&   r'   r*   	getLoggerrs   r   rx   r5   rI   rJ   ry   r   r   r   r   r   r   r   r   rq   	dataclassr   r   r   r   r   r   r  r  r#  r(  r+  r1  r-  r0  r2  r4  r5  r6  r7  r;  r<  r>  r?  rD  rI  rf  r  rj  rq  rX  r  r  r  r   r  r  r  r  r  r  r  r    r    r    r$   <module>   s   
	

GM   



\



	
    +@
-0?G 