U
    Ç-eÿ<  ã                   @   sÔ   d dl Z ddlmZmZmZ ddlmZ eddd„ iƒeejejejejejdœd	d
„ƒƒZddd„Z	dd„ Z
eejejejejejdœdd„ƒZddd„Zdd„ Zddd„ZG dd„ de jjƒZG dd„ dƒZdS )é    Né   )ÚcdivÚ
heuristicsÚjit)ÚlanguageÚEVEN_Kc                 C   s   | d | d  dkS )NÚKÚTILE_Kr   © )Únargsr
   r
   ú^/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/triton/ops/blocksparse/matmul.pyÚ<lambda>   ó    r   )ÚTILE_MÚTILE_Nr	   ÚBLOCKr   c           *      C   s@  t  d¡| }||d 7 }t  d¡}t  |d ¡}t  |d ¡}|| t  d|¡|  }t  d|¡}| ||  ||  |d d …d f |  |d d d …f |  }t  |d ¡}|| t  d|¡|  }t  d|¡} |||  ||  |d d d …f |
  | d d …d f |	  }!t j||ft jd}"t|d| ƒD ]}#|rPt  |¡}$t  |!¡}%n@t j||d d d …f |#k dd}$t j|!| d d …d f |#k dd}%|"t j|$|%t jd7 }"||| 7 }|!||	 7 }!q0|" |j	j
¡}&t  d|¡| }'t  d|¡| }(|||  ||  |'d d …d f |  |(d d d …f |  })t j|)|&d	d
 d S )Nr   r   é   é   ©Údtypeg        )ÚmaskÚother©Z	out_dtypeT©r   )ÚtlÚ
program_idÚloadÚarangeÚzerosÚfloat32ÚrangeÚdotÚtor   Ú
element_tyÚstore)*ÚAÚBÚCZ	stride_zaÚ	stride_haZ	stride_maÚ	stride_akÚ	stride_zbÚ	stride_hbÚ	stride_bkZ	stride_nbÚ	stride_zcÚ	stride_hcZ	stride_mcZ	stride_ncr   Zgrid_offsetÚlutr   r   r	   r   r   Úblock_idZoff_zÚoff_hZstart_amÚoffs_amÚoffs_akZa_ptrsZstart_bnÚoffs_bnÚoffs_bkZb_ptrsÚaccÚkÚaÚbÚcÚoffs_cmÚoffs_cnÚpcr
   r
   r   Ú_sdd_kernel   sf    
ÿþýüÿþýü
  ÿþýür>   c
                 C   sÐ  |   d¡dkr$|   d¡dkr$|  ¡ } |  d¡dkrH|  d¡dkrH| ¡ }|rd||  } }| |  }}|rldnd}
|rxdnd}| j|
 |j|  }}||kr°td|› d|› dƒ‚|	d krâtj| jd	 |jd	 ||f| j| jd
}n(|	j| jd	 |jd	 ||fkst‚|	}|jd d|jd	 g}t	| | |||   d	¡|   d¡|   |rJdnd¡|   |r\dnd¡|  d	¡|  d¡|  |r~dnd¡|  |rdnd¡|  d	¡|  d¡|  d¡|  d¡|d	|||d|ddd |S )Nr   r   r   éþÿÿÿéÿÿÿÿzInner dimension mismatch (A: z vs B: ú)r   ©r   Údeviceé    é   )r   r   r	   r   Ú
num_stagesÚ	num_warps)
ÚstrideÚ
contiguousÚshapeÚ
ValueErrorÚtorchÚemptyr   rC   ÚAssertionErrorr>   )r8   r9   Útrans_aÚtrans_bÚtrans_cÚspdimsÚblockr/   ÚwidthsÚoutZa_dimZb_dimZKaZKbr:   Úgridr
   r
   r   Ú
sdd_matmulT   sX    
*$                 ù	rW   c                 C   s&   | j dd |¡ ¡ }| ¡ }|d fS )NF©Úas_tuple)Únonzeror"   ÚintrI   )ÚlayoutrS   rC   r/   r
   r
   r   Úsdd_lutv   s    r]   )r   r   r	   ÚGROUP_SIZE_Mr   c           4      C   sþ  t  d¡}t  d¡}t  d¡}t  d¡}t  |||||¡\}}t  d¡}||d  }t  |d ¡}t  |d ¡}t  |d ¡}t  |d ¡} || }!t  |!d ¡}"t  |"d¡}"t  d|¡}#t  d|¡}$| ||  |"|  |#d d …d f |  |$d d d …f |  }%|| t  d|¡ }&t  t  |&| |¡|¡}&t  |!¡}'t  |'d¡}'|'t  d|¡ }(|||  | |  |&d d d …f |
  |(d d …d f |	  })t j||ft j	d}*|!d7 }!t  |!d ¡}+t  |+d¡}+t  |!¡},t  |,d¡},t
|d| ƒD ]|}-t  |%¡}.t  |)¡}/|*t j|.|/t j	d7 }*|%|+7 }%|)|,|	 7 })|!d7 }!t  |!d ¡}+t  |+d¡}+t  |!¡},t  |,d¡},qê|* |jj¡}0|| t  d|¡ }1|| t  d|¡ }2|| |  ||  |1d d …d f |  |2d d d …f |  }3t j|3|0|2d d d …f |k d	 d S )
Nr   r   r   rE   r   é   r   r   r   )r   r   Znum_programsZ	swizzle2dr   Zmultiple_ofr   Zmax_contiguousr   r   r    r!   r"   r   r#   r$   )4r%   r&   r'   Z	stride_azr(   Z	stride_amr)   r*   r+   r,   Z	stride_bnr-   r.   Z	stride_cmZ	stride_cnZDS0ZDS1r/   r   r   r	   r^   r   Zpid_mZpid_nZ	num_pid_mZ	num_pid_nZpidzÚheaderÚoffsetr   Úcolumnr1   Zpincr0   r2   r3   Úpar4   Zstart_bkr5   Zpbr6   Zinc_aZinc_br7   r8   r9   r:   r;   r<   r=   r
   r
   r   Ú_dsd_kernel‚   s‚    





ÿþý

ÿþý



ÿþýürd   c
                    sÆ  |   d¡dkr$|   d¡dkr$|  ¡ } |  d¡dkrH|  d¡dkrH| ¡ }|||rTdnd  }
| d¡‰ | d¡}| |r|dnd¡‰| j}ˆ }|}|r˜ˆn|
}|r¤|
nˆ}|	d krÌtj||||f|| jd}n|	j||||fksât‚|	}d}‡ ‡‡fdd„}t	| | |||   d¡|   d¡|   |r$dnd¡|   |r6dnd¡|  d¡|  d¡|  |rXdnd¡|  |rjdnd¡|  d¡|  d¡|  |rŒdnd¡|  |rždnd¡ˆ|
|||t
|d	ƒ|d
d
d
d |S )Nr   r   r   r   rB   é€   c                    s   t ˆ| d ƒˆˆ gS )Nr   )r   )Úmeta©ÚBS0ZBS3Úwidthr
   r   r   æ   r   zdsd_matmul.<locals>.<lambda>rD   rE   )r   r   r	   r   rF   rG   r^   )rH   rI   Úsizer   rL   rM   rC   rJ   rN   rd   Úmin)r8   r9   rO   rP   rQ   rR   rS   r/   ri   rU   ZAS1ÚBS1r   ZCS0ZCS1ZCS2ZCS3r:   r   rV   r
   rg   r   Ú
dsd_matmulÎ   s^    

                  ù
rm   c                 C   s¢  t  | |rdnd¡}t  |¡jdd\}}| ¡ }|| }|rL| jdd}	n|  dd¡jdd}	|	 d¡}
t  |¡}t j|dd… dd	|dd…< t  	||
d t  |¡ ¡}|	dd…df | }| 
¡ }|dd…  |dd… 8  < || }| dd¡ d|¡}||dd…dd…f< |dd…df  |d | 8  < |||dk  |||dk df< | d¡}|rvt j|
| jd
}nšt jg t j| jd}d}t|  d¡ƒD ]r}| |dd…dd…f  
¡  ¡ }| ¡ }dt j|| jd
 ||dk< t  |||j|jdk  d f¡}||7 }qœ|| | }|dd…  |dd… | | 8  < | dd¡ d|¡}|r||dd…dd…f< |dd…df  |d | 8  < n<|| |dd…dd…f< |dd…df  |d | | 8  < |||dk  |||dk df< | d¡}| d¡}|d | d|  }|| }t j||||fdd	 d¡ ¡ }t j||fdd	 d¡ ¡ }t jd|j|jd}t  ||f¡}t  ||f¡}| t j¡ |¡}||fS )a  
    Generates the look-up table for incrementing pointers in the DSD/DDS matmul.
    Example (BLOCK=32, STEP=16)
    [[1, 0, 0, 1, 0],
     [0, 1, 1, 0, 1],
     [1, 0, 1, 0, 0]]

    Then the offsets for A are
     [0 , 16, 32, 48] <- row 0
      \----/  \----/
      col=0   col=3
     [64, 80, 96, 112, 128, 144] <- row 1
      \----/   \----/  \------/
       col=1    col=2    col=3
     [160, 176, 192, 208]
    which leads to increments table
    [0, 16, 16, 16, || 64, 16, 16, 16, 16, 16, || 160, 16, 16, 16]

    Because B is dense, the offsets are
    [0, 16, 96, 112] <- row 0
    [32, 48, 64, 80]  <- row 1
    [0, 16, 64, 80]   <- row 2
    r   r   TrX   Fr   Nr@   )Údim)rC   rB   rE   é   )rC   r   )rL   ÚsumZ	ones_likerZ   ÚflattenZ	transposerj   Z
zeros_likeZcumsumrk   ÚcloneÚviewÚrepeatr   rC   ZtensorZint64r    ÚlongÚcatÚTÚstackrI   r   r   ÚtypeZint32r"   )r\   rS   ÚstepZtransrC   ÚsizesZhead_idZcol_idÚsegmentsZnnzZ
num_blocksÚoffsetsZB_idxZB_incsÚdivZA_idxÚcurrent_offsetÚzZlayoutwZmsumZA_incsri   r`   ZincsÚpadr/   r
   r
   r   Údsd_lutô   sd    

  
"$"$ 

 r‚   c
           
      C   s"   t || | | | |||||	d
S ©N)rU   )rm   )
r8   r9   rO   rP   rQ   rR   rS   r/   ri   rU   r
   r
   r   Ú
dds_matmulZ  s    r„   c                   @   s0   e Zd ZeeedœZedd„ ƒZedd„ ƒZ	dS )Ú_matmul©ÚsddÚdsdÚddsc                 C   sx   t j| ||||||||	|
|d
}|  ||¡ || _|| _|| _|| _|| _|| _|| _	|| _
|| _|| _|d k	| _|S rƒ   )r…   ÚfnZsave_for_backwardÚda_lutÚda_widthÚdb_lutÚdb_widthÚmoderR   rS   rO   rP   rQ   Úhas_out)Úctxr8   r9   rO   rP   rQ   r   rR   rS   Úc_lutÚc_widthr‹   rŒ   r   rŽ   rU   r:   r
   r
   r   Úforwardf  s    "
z_matmul.forwardc           
      C   sè   | j \}}d\}}| j}| jd rh|d |d  |d  }tj| ||| j| j | j| j| j	| j
| jƒ	}| jd r¸|d |d  |d  }tj| ||| j | j| j| j| j	| j| jƒ	}| jrÂ|nd }	||d d d d d d d d d d d d |	fS )N)NNr   r   r   )Zsaved_tensorsr   Zneeds_input_gradr…   rŠ   rQ   rP   rO   rR   rS   r‹   rŒ   r   rŽ   r   )
r‘   Zdcr8   r9   ÚdaÚdbr   Zmode_daZmode_dbZdoutr
   r
   r   Úbackward{  sT    

        ÿ
        ÿ
        þz_matmul.backwardN)
Ú__name__Ú
__module__Ú__qualname__rW   rm   r„   rŠ   Ústaticmethodr”   r—   r
   r
   r
   r   r…   b  s
   
r…   c                   @   s    e Zd Zddd„Zddd„ZdS )	ÚmatmulFc           	      C   sB  |dkrt dƒ‚|| _|| _|| _|| _|| _|| _|j| _t	|dƒ}| jdkr”t
|||ƒ\| _| _t|||d|ƒ\| _| _t|||d|ƒ\| _| _| jdkrèt|||| j |ƒ\| _| _t
|||ƒ\| _| _t|||| j|ƒ\| _| _| jdkr>t|||| j|ƒ\| _| _t|||| j |ƒ\| _| _t
|||ƒ\| _| _d S )	Nr†   z"Supported modes are: sdd, dsd, ddsrD   r‡   TFrˆ   r‰   )ÚNotImplementedErrorrS   r   rO   rP   rQ   r\   rJ   rR   rk   r]   r’   r“   r‚   r‹   rŒ   r   rŽ   )	Úselfr\   rS   r   rC   rO   rP   rQ   rz   r
   r
   r   Ú__init__•  s,    


zmatmul.__init__Nc                 C   sB   t  ||| j| j| j| j| j| j| j| j	| j
| j| j| j|¡}|S )N)r…   ÚapplyrO   rP   rQ   r   rR   rS   r’   r“   r‹   rŒ   r   rŽ   )rž   r8   r9   rU   r:   r
   r
   r   Ú__call__­  s$              ûzmatmul.__call__)FFF)N)r˜   r™   rš   rŸ   r¡   r
   r
   r
   r   rœ   “  s   
rœ   )N)N)N)rL   Ú r   r   r   r   r   Z	constexprr>   rW   r]   rd   rm   r‚   r„   ZautogradÚFunctionr…   rœ   r
   r
   r
   r   Ú<module>   s6    ÿ   ù@
"   ùK
&f
1