U
    9%e                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d	d
lmZ d	dlmZ e eZdd ZG dd deZdd Zdd Zdd ZeeedZeded dddZdS )    N)defaultdict)Set)GraphModule)partition_cudagraphs)StorageWeakRef)Module)tree_map   )aot_autograd)register_backendc                 C   s   t | tjr|  S | S d S N)
isinstancetorchZTensorclone)t r   `/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torch/_dynamo/backends/cudagraphs.pycloner   s    r   c                       sJ   e Zd ZU eed< ee ed<  fddZdZdZ	dZ
dZdd Z  ZS )	CudaGraphModulegmmutated_inputsc                    s   t    || _|| _d S r   )super__init__r   r   )selfr   r   	__class__r   r   r      s    
zCudaGraphModule.__init__FNc              	   G   sP  | j d k	rvt|t| jks tt| j|D ]\}}|| q,| j   | jD ]}|| | j|  qPtt	| j
S | jrdd |D | _tj | _ tj | j  | j| j | _
W 5 Q R X | j   | jD ]}|| | j|  qtt	| j
S tj }|tj  tj| | j| }W 5 Q R X tj | d| _|S d S )Nc                 S   s   g | ]}|  qS r   r   .0xr   r   r   
<listcomp><   s     z,CudaGraphModule.__call__.<locals>.<listcomp>T)graphlenstatic_inputsAssertionErrorzipcopy_replayr   r   r   static_outputs	warmed_upr   cuda	CUDAGraphr   Streamwait_streamcurrent_streamstream)r   argsdstsrcir/   rr   r   r   __call__,   s0    





zCudaGraphModule.__call__)__name__
__module____qualname__r   __annotations__r   intr   r)   r!   r#   r(   r5   __classcell__r   r   r   r   r      s   
r   c                 C   s   dd }t t}d}t }| jD ]}|jdkrT|t||j  | |d7 }q |jdkr |jt	j
krlq |jj}t|jD ]p\}}|t|jk r|j| }	n|j|jkrq~|j|j }	d}
|jr|jjrd}
|
r~||t||	j  O }q~q |S )	Nc                 S   s   d| kr| d S | d S )NvalZfake_resultr   )metar   r   r   meta_fkW   s    z%find_input_mutations.<locals>.meta_fkr   placeholderr	   Zcall_functionFT)r   setnodesopr   r=   Z_typed_storageaddtargetoperatorgetitemZ_schema	enumerate	argumentsr"   r0   namekwargsZ
alias_infoZis_write)gr>   inputsZ	input_idxr   nZschemar3   argargumentZmut_argr   r   r   find_input_mutationsV   s6    




rP   c                 C   s\   | j jD ]N}|jdkr|jr t| |j}| |j t|j }| 	|jt
|| qd S )NZcall_module)r!   rA   rB   rJ   r$   Zget_submodulerD   Zdelete_submodulerP   Zadd_submoduler   )r   rM   Zsubmodr   r   r   r   apply_cuda_graphs{   s    


rQ   c                 C   s   t | |} t|  | S r   )r   rQ   )modelrL   r   r   r   
cudagraphs   s    
rS   )Zfw_compilerZbw_compiler)rI   Zcompiler_fnTc              	      s   t |ttfst r&dd |D nt|tj  tj }|tj	  tj
| | |  W 5 Q R X |  tj	 | tj  tj tjj|d |  W 5 Q R X t ttfsֈf fdd}|S )zBThis isn't registered as a backend, but is used in some benchmarksc                 S   s   g | ]}t |qS r   )r   Z
zeros_liker   r   r   r   r       s     z$cudagraphs_inner.<locals>.<listcomp>)r/   c                     sX   t t | kst r6t| D ]\}}|| q"  rPdd D S S d S )Nc                 S   s   g | ]}|  qS r   r   r   r   r   r   r       s     z1cudagraphs_inner.<locals>.run.<locals>.<listcomp>)r"   r$   r%   r&   r'   )Z
new_inputsr1   r2   copy_inputscopy_outputsr!   r#   r(   r   r   run   s    zcudagraphs_inner.<locals>.run)r   listtupler$   r   r*   Zsynchronizer,   r-   r.   r/   r+   r!   )rR   rL   rV   rU   r/   rW   r   rT   r   cudagraphs_inner   s&    



rZ   )TT)loggingrE   collectionsr   typingr   r   Ztorch.fxr   Z#torch.fx.passes.backends.cudagraphsr   Z torch.multiprocessing.reductionsr   Ztorch.nnr   Ztorch.utils._pytreer   commonr
   registryr   	getLoggerr6   logr   r   rP   rQ   rS   Zaot_cudagraphsrZ   r   r   r   r   <module>   s(   
<%