U
    9%ez                     @   s   d dl Z ddlmZmZ ddlmZ ddlmZ dd Zeddd	 ied
dd	 ieejdddZ	eddd	 ied
dd	 ieejdddZ
G dd de jjZejZdS )    N   )
heuristicsjit)languagenext_power_of_2c                 C   s   | dk rdS | dk rdS dS )Ni      i           )Nr   r   W/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/triton/ops/cross_entropy.py	num_warps   s
    r   c                 C   s   t | d S Nr   r   nargsr   r   r   <lambda>       r   BLOCKc                 C   s   t | d S r   r   r   r   r   r   r      r   )r   c                 C   s   t d}t d|}t || }| ||  | } |||  | }	|||  | }
t j| ||k td d}|t j}|t |d }t t 	t 
|d| }t j|	|||k d t   t |
}t || | d S Nr   inf)maskother)r   )tl
program_idarangeloadfloattofloat32maxlogsumexpstoreZdebug_barrier)ZLOGITSPROBSIDXZLOSSr   r   rowcolsidxZ
WRIT_PROBSZ
READ_PROBSlogitsprobsr   r   r   _forward   s    

r-   c                 C   s   t | d S r   r   r   r   r   r   r   )   r   c                 C   s   t | d S r   r   r   r   r   r   r   *   r   c                 C   s   t d}t d|}t || }| ||  | } t j| ||k tdd }t |t j}||k}	t || }
||	 |
 }t j| || j	j
||k d d S r   )r   r   r   r   r   r$   r   r    r%   dtypeZ
element_ty)r&   r'   ZDPROBSr   r   r(   r)   r*   r,   deltaZdoutZdinr   r   r   	_backward)   s    
r0   c                   @   s$   e Zd Zedd Zedd ZdS )_cross_entropyc           	         s~   |j tjkstd j j  }} jd tj|||d}tj ||d} fdd}t|  ||| ||| |S )Nz(Indices are expected to be of type long.)r.   devicec                    s       fS NZnumeloptr+   n_colsr   r   r   H   r   z(_cross_entropy.forward.<locals>.<lambda>)	r.   torchZint64AssertionErrorr3   shapeZ
empty_liker-   Zsave_for_backward)	clsctxr+   indicesr3   r.   resultneg_logprobsgridr   r8   r   forward>   s    
z_cross_entropy.forwardc                    s<   |j \}jd   fdd}t| ||  dfS )a  We know d(-log(p[i])/dlogit[k] = -id_mat[i,k] + p[k]
        so we initialize the gradient as neg_logprobs, so we can just exponentiate
        to get p[k], which is most of what we need...  neg_logprobs will be
        modified in place to become the gradient we want
        r2   c                    s       fS r4   r5   r6   r9   rA   r   r   r   Z   r   z)_cross_entropy.backward.<locals>.<lambda>N)Zsaved_tensorsr<   r0   )r=   r>   Zdneg_logprobsr?   rB   r   rD   r   backwardN   s
    

z_cross_entropy.backwardN)__name__
__module____qualname__classmethodrC   rE   r   r   r   r   r1   =   s   
r1   )r:    r   r   r   r   r   r   Z	constexprr-   r0   ZautogradFunctionr1   applyZcross_entropyr   r   r   r   <module>   s   "