U
    9%e:                     @   s  d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ dgZe jjZd	d
 ZddedddZd5edddZd6edddZd7edddZd8ee ee ee eedddZddedddZdd Z edddZ!dd  Z"ddedd!d"Z#d#d$ Z$ddedd%d&Z%ej&eej'eej(eej)eej*eej+eej,e!ej-e#ej.e#ej/e%ej0e%iZ1d'd( Z2d)d*d+d,d-gZ3d.d/ Z4d0d1 Z5G d2d deZ6G d3d4 d4eZ7dS )9    N)tree_map)ListAnyDictOptionalUnion
NamedTuple)defaultdict)TorchDispatchMode)RemovableHandleprodFlopCounterModec                 C   s   t | tjr| jS | S N)
isinstancetorchTensorshape)i r   W/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torch/utils/flop_counter.py	get_shape   s    r   )	out_shapereturnc          	      O   s,   | \}}|\}}||kst || d | S )z!
    Count flops for matmul.
       AssertionError)	a_shapeb_shaper   argskwargsmkk2nr   r   r   mm_flop   s    r&   c                 K   s
   t ||S )z
    Count flops for addmm
    )r&   Z
self_shaper   r   r   r!   r   r   r   
addmm_flop   s    r(   c                 K   sD   | \}}}|\}}}	||ks t ||ks,t || |	 d | }
|
S )z,
    Count flops for the bmm operation.
    r   r   )r   r   r   r!   br"   r#   b2r$   r%   flopr   r   r   bmm_flop%   s    

r,   c                 K   s
   t ||S )z0
    Count flops for the baddbmm operation.
    )r,   r'   r   r   r   baddbmm_flop3   s    r-   F)x_shapew_shaper   
transposedr   c           
      C   sJ   | d }|r| n|dd }|^}}}|t | | t | d | }	|	S )a  
    Count flops for convolution. Note only multiplication is
    counted. Computation for bias are ignored.
    Flops for a transposed convolution are calculated as
    flops = (x_shape[2:] * prod(w_shape) * batch_size).
    Args:
        x_shape (list(int)): The input shape before convolution.
        w_shape (list(int)): The filter shape.
        out_shape (list(int)): The output shape after convolution.
        transposed (bool): is the convolution transposed
    Returns:
        int: the number of flops
    r   r   Nr   )
r.   r/   r   r0   Z
batch_sizeZ
conv_shapeZc_outZc_inZdimsr+   r   r   r   conv_flop_count<   s
    
 r1   c          
      O   s   t | |||dS )z&
    Count flops for convolution.
    )r0   )r1   )
r.   r/   _bias_stride_padding	_dilationr0   r   r    r!   r   r   r   	conv_flopX   s    r6   c                 C   s    | d | d gt | dd   S )N   r   r   )list)r   r   r   r   transpose_shape^   s    r9   c                 C   sZ   d}|
d r,t |d }|t| ||| 7 }|
d rVt |d }|tt|| ||7 }|S Nr   r7   )r   r1   r9   )grad_out_shaper.   r/   r2   r3   r4   r5   r0   Z_output_paddingZ_groupsZoutput_maskr   
flop_countZgrad_input_shapeZgrad_weight_shaper   r   r   conv_backward_flopa   s    r=   c                 C   s   | \}}}}|\}}}	}
|\}}}}||  kr8|krln n0||  krP|krln n||
krl|	|krl||
kspt d}|t|| ||f|| ||	f7 }|t|| ||	f|| |	|f7 }|S )z]
    Count flops for self-attention.
    NB: We can assume that value_shape == key_shape
    r   r   r,   )query_shape	key_shapevalue_shaper)   hs_qd_q_b2_h2s_k_d2_b3_h3_s3d_vtotal_flopsr   r   r   sdpa_flop_county   s    L""rN   c                O   s   t | ||S )z)
    Count flops for self-attention.
    )rN   )r?   r@   rA   r   r    r!   r   r   r   	sdpa_flop   s    rO   c                 C   sR  d}|\}}}}|\}	}
}}|\}}}}| \}}}}||	  krR|  krR|krn n*||
  krt|  krt|krn n||kst ||kr||kr||kst d}|t|| ||f|| ||f7 }|t|| ||f|| ||f7 }|t|| ||f|| ||f7 }|t|| ||f|| ||f7 }|t|| ||f|| ||f7 }|S )Nr   r>   )r;   r?   r@   rA   rM   r)   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   Z_b4Z_h4Z_s4Z_d4r   r   r   sdpa_backward_flop_count   s    P"""""rP   c                O   s   t | |||S )z2
    Count flops for self-attention backward.
    )rP   )r;   r?   r@   rA   r   r    r!   r   r   r   sdpa_backward_flop   s    rQ   c                 C   s   t | ts| fS | S r   )r   tuplexr   r   r   normalize_tuple   s    
rU    KMBTc                 C   s0   t dtttd tt| d d }t| S )Nr   r7      )maxminlensuffixesstr)numberindexr   r   r   get_suffix_str   s    (rc   c                 C   s&   t |}| d|  d}|t |  S )Ni  z.3f)r_   rb   )ra   suffixrb   valuer   r   r   convert_num_with_suffix   s    
rf   c                       s   e Zd ZdZd"eeejje	ejj f  e
eeeeef  dddZdd	 Zd
d Zdd Zdd Zdd Zdd Ze
dddZeeeee
f f dddZd#ddZ fddZ fddZd$d d!Z  ZS )%r   a  
    ``FlopCounterMode`` is a context manager that counts the number of
    flops within its context. It does this using a ``TorchDispatchMode``.

    It also supports hierarchical output by passing a module (or list of modules) to FlopCounterMode on construction.

    Example usage

    .. code-block:: python

        mod = ...
        flop_counter = FlopCounterMode(mod)
        with flop_counter:
            mod.sum().backward()

    Nr   T)modsdepthdisplaycustom_mappingc                 C   s\   t dd | _|| _dg| _|| _|d kr.i }t|tjjrB|g}|| _	i | _
t|| _d S )Nc                   S   s   t tS r   )r	   intr   r   r   r   <lambda>       z*FlopCounterMode.__init__.<locals>.<lambda>Global)r	   flop_countsrh   parentsri   r   r   nnModulerg   _module_to_forward_hook_handlesflop_mapping)selfrg   rh   ri   rj   r   r   r   __init__   s    zFlopCounterMode.__init__c                 C   s   | j d krd S | j D ]t}t|j}t|  D ]T\}}|dkrH|}nd||g}|| |}|	| 
|}t||| j|< q2qd S )NrV   .)rg   type__name__dictZnamed_modulesitemsjoinZregister_forward_pre_hook_enter_moduleZregister_forward_hook_exit_module_ForwardHookHandlesrs   )ru   modprefixnamemoduleforward_pre_hook_handleforward_hook_handler   r   r   _register_forward_hooks   s    


 z'FlopCounterMode._register_forward_hooksc                 C   s6   | j  D ]}|d   |d   q
| j   d S r:   )rs   valuesremoveclear)ru   Zforward_hook_handlesr   r   r   _deregister_forward_hooks  s    z)FlopCounterMode._deregister_forward_hooksc                    s    fdd}|S )Nc                    s   t |} | }|S r   )rU   _create_pre_module)r   inputsoutr   ru   r   r   f  s    z(FlopCounterMode._enter_module.<locals>.fr   ru   r   r   r   r   r   r}     s    zFlopCounterMode._enter_modulec                    s    fdd}|S )Nc                    s   t |} | S r   )rU   _create_post_module)r   r   outputsr   r   r   r     s    z'FlopCounterMode._exit_module.<locals>.fr   r   r   r   r   r~     s    zFlopCounterMode._exit_modulec                    s    G  fdddt jj}|jS )Nc                       s0   e Zd Ze fddZe fddZdS )z6FlopCounterMode._create_post_module.<locals>.PushStatec                    sB   j d  kstj   tdd |}t|dkr>|d S |S )Nc                 S   s   t | tjr|  S | S r   r   r   r   clonerS   r   r   r   rl   '  rm   zPFlopCounterMode._create_post_module.<locals>.PushState.forward.<locals>.<lambda>r7   r   )rp   r   popr   r^   ctxr    r   r   r   forward#  s    
z>FlopCounterMode._create_post_module.<locals>.PushState.forwardc                    s   j   |S r   )rp   appendr   Z	grad_outsr   r   r   backward,  s    z?FlopCounterMode._create_post_module.<locals>.PushState.backwardNry   
__module____qualname__staticmethodr   r   r   r   r   r   	PushState"  s   r   r   ZautogradFunctionapply)ru   r   r   r   r   r   r   !  s    z#FlopCounterMode._create_post_modulec                    s    G  fdddt jj}|jS )Nc                       s0   e Zd Ze fddZe fddZdS )z4FlopCounterMode._create_pre_module.<locals>.PopStatec                    s2   j   tdd |}t|dkr.|d S |S )Nc                 S   s   t | tjr|  S | S r   r   rS   r   r   r   rl   8  rm   zNFlopCounterMode._create_pre_module.<locals>.PopState.forward.<locals>.<lambda>r7   r   )rp   r   r   r^   r   r   r   r   r   5  s
    z<FlopCounterMode._create_pre_module.<locals>.PopState.forwardc                    s    j d  kstj   |S )Nr   )rp   r   r   r   r   r   r   r   =  s    
z=FlopCounterMode._create_pre_module.<locals>.PopState.backwardNr   r   r   r   r   PopState4  s   r   r   )ru   r   r   r   r   r   r   3  s    z"FlopCounterMode._create_pre_moduler   c                 C   s   t | jd  S )Nrn   )sumro   r   ru   r   r   r   get_total_flopsE  s    zFlopCounterMode.get_total_flopsc                 C   s
   t | jS )a  Returns the flop counts as a dictionary of dictionaries. The outer
        dictionary is keyed by module name, and the inner dictionary is keyed by
        operation name.

        Returns:
            Dict[str, Dict[Any, int]]: The flop counts as a dictionary.
        )rz   ro   r   r   r   r   get_flop_countsH  s    zFlopCounterMode.get_flop_countsc                    s   |d krj }|d krd}dd l}d|_dddg}g }  t d fdd	}j D ]H}|d
krtqf|dd }||krqf|||d }|D ]}	||	 qqfd
jkrst	|D ] \}
}	d||
 d  ||
 d< q|d
d| }t
|dkrd
ddgg}|j||ddS )Ni?B r   Trr   ZFLOPz% TotalFc                    s   t j|   }| kO d| }g }|||  t||  d ddg j|   D ]:\}}||d t| t||  d ddg qb|S )N d   z.2f%z - )r   ro   r   r   rf   r{   r`   )mod_namerh   rM   paddingr   r#   vZglobal_flopsZglobal_suffixZis_global_subsumedru   r   r   process_mod`  s     z.FlopCounterMode.get_table.<locals>.process_modrn   rw   r7   r   0z0%)leftrightr   )headersZcolalign)rh   tabulateZPRESERVE_WHITESPACEr   rc   ro   keyscountr   	enumerater^   )ru   rh   r   headerr   r   r   Z	mod_depthZ
cur_valuesre   idxr   r   r   	get_tableR  s8    
zFlopCounterMode.get_tablec                    s    | j   |   t   | S r   )ro   r   r   super	__enter__r   	__class__r   r   r     s    

zFlopCounterMode.__enter__c                    s.   | j rt| | j |   t j|  d S r   )ri   printr   rh   r   r   __exit__)ru   r    r   r   r   r     s    zFlopCounterMode.__exit__r   c                 C   s~   |r|ni }|||}|j }|| jkrz| j| }tt|||f\}}}|||d|i}	| jD ]}
| j|
 |  |	7  < q^|S )Nr   )Z_overloadpacketrt   r   r   rp   ro   )ru   functypesr    r!   r   Zfunc_packetZflop_count_funcr   r<   parr   r   r   __torch_dispatch__  s    



z"FlopCounterMode.__torch_dispatch__)Nr   TN)N)r   N)ry   r   r   __doc__r   r   r   rq   rr   r   rk   boolr   r   rv   r   r   r}   r~   r   r   r   r`   r   r   r   r   r   __classcell__r   r   r   r   r      s.       

=c                   @   s   e Zd ZU eed< eed< dS )r   r   r   N)ry   r   r   r   __annotations__r   r   r   r   r     s   
r   )N)N)N)F)8r   Ztorch.nnrq   Ztorch.utils._pytreer   typingr   r   r   r   r   r   collectionsr	   Ztorch.utils._python_dispatchr
   Ztorch.utils.hooksr   mathr   __all__ZopsZatenr   rk   r&   r(   r,   r-   r   r1   r6   r9   r=   rN   rO   rP   rQ   mmZaddmmZbmmZbaddbmmZconvolutionZ_convolutionZconvolution_backwardZ'_scaled_dot_product_efficient_attentionZ#_scaled_dot_product_flash_attentionZ0_scaled_dot_product_efficient_attention_backwardZ,_scaled_dot_product_flash_attention_backwardrt   rU   r_   rc   rf   r   r   r   r   r   r   <module>   sx                 P