U
    9%e                  	   @  s(  d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZ ddlmZ ddlmZ d	d
lmZmZ edZdZdZdddddZddddZdd ZG dd dZG dd deZG dd deZG dd deZedZed Zed!Z ed"Z!ed#Z"ed$Z#ed%Z$ed&Z%ed'Z&ed(Z'ed)Z(ed*Z)ed+Z*ed,Z+ed-Z,ed.Z-ed/Z.ee"Z/G d0d1 d1Z0G d2d3 d3Z1d4d5 Z2edd7d8Z3edd9d:Z4edd;d<Z5d=d> Z6edd?d@Z7eddAdBZ8eddCdDZ9eddEdFZ:eddHdIZ;ed dJdKZ<eddLdMZ=dNdO Z>eddPdQZ?edRe-d6fdSdTZ@ed6d6eA dUdUdUdGd6fdVdWZBeddYdZZCedd3d[d\d]ZDedd3d[d^d_ZEd`dadbdcddZFeeFdeddfdgZGeeFdhddidjZHeeFdkddldmZIeeFdnd	dodpZJeeFdqd
drdsZKeeFdtddudvZLeeFdwddxdyZMeeFdzdd{d|ZNedd}d~ZOedddZPedddZQd`dadbddZReeRddddZSeeRddddZTeeRddddZUeeRddddZVeeRddddZWeeRddddZXdd`d`d`dadddZYedd ZZedddZ[edddZ\edddZ]edd Z^edd Z_edd Z`edd Zaedd Zbedd ZceeYdddddddnZdeeYddddddZeedd Zfedd Zgedd Zhedd ZieeYdddddddqZjeeYddddddZkeddĄ ZleeYdŃdddńZmeddȄ ZneeYdɃd dd˄Zod!d`d`d`daddd̈́Zped"ddτZqeepdЃd#ddЄZreddӄ Zseepdԃd$ddԄZted%ddׄZued&ddلZved'ddۄZwed(dd݄Zxeddd6dGd6dd`d`dddZyed)ddZzed6dddZ{ed*ddZ|G dd dZ}d+d`d`dddddddZ~d,d`d`ddddddZdd Zd6S (-      )annotations)contextmanager)Enum)wraps)CallableListSequenceTypeVar   )ir)jit   )mathsemanticTi   Z__triton_builtin__)fnreturnc                   s0   t  stt  fdd}t|td |S )zMark a function as a builtin.c                    s&   d|ks|d d krt d | |S )N_builderzdDid you forget to add @triton.jit ? (`_builder` argument must be provided outside of JIT functions.)
ValueError)argskwargsr    S/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/triton/language/core.pywrapper   s
    zbuiltin.<locals>.wrapperT)callableAssertionErrorr   setattrTRITON_BUILTIN)r   r   r   r   r   builtin   s
    r    boolr   c                 C  s   t | tdS )z-Is this a registered triton builtin function?F)getattrr   r   r   r   r   
is_builtin%   s    r$   c                 C  s  t | trt|| tS t | trd|   kr8dk rLn nt|| tS d|   kr`dk rtn nt|| tS d|   krdk rn nt|	| t
S d|   krdk rn nt|	| tS td|  dnt | tr^d	}d
dd  }td | }|tdks<|dks<| | ks<||  kr8|krLn nt|| tS t|| tS n(t | trvt| j|S t | tr| S dstd|  dt|  dd S )Ni   l        l        l         l            l            zNonrepresentable integer .g      8g   ?r
      absinfg        Fzcannot convert z	 of type z
 to tensor)
isinstancer!   tensorZget_int1int1intZ	get_int32int32uint32Z	get_int64int64uint64RuntimeErrorfloat__builtins__Zget_fp32float32Zget_fp64float64	constexpr
_to_tensorvaluer   type)xbuilderZmin_float32Zmax_float32Zabs_xr   r   r   r7   *   sB    

 
r7   c                   @  s  e Zd ZddddgZddddd	gZd
ddddddgZddddgZdgZG dd deZ	dd Z
dd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8d9 Zd:d; Zd<d= Zd>d? Zd@dA Z e!dBdC Z"e!dDdE Z#e!dFdG Z$d dHdIdJZ%d dHdKdLZ&dMdN Z'e(dOdP Z)dQdRdSdTdUZ*dVdW Z+e(dXdYdZd[Z,d\d] Z-d^S )_dtypeint8int16r-   r/   r+   uint8uint16r.   r0   fp8e4b15fp8e4fp8e5fp16bf16fp32fp64voidc                   @  s   e Zd ZdZdZdS )zdtype.SIGNEDNESSr   r   N)__name__
__module____qualname__SIGNEDUNSIGNEDr   r   r   r   
SIGNEDNESSS   s   rN   c                 C  s  || _ |tjtj tj tj ks*t||tjkr^tjj| _	t
|dd | _| j| _n*|tjkrtjj| _	t
|dd | _| j| _n|tjkrx|dkrd| _d| _d| _n|dkrd| _d| _d| _n|d	krd
| _d| _d| _n|dkrd| _d| _d| _nh|dkr,d| _d| _d| _nJ|dkrJd| _d| _d| _n,|dkrhd| _d| _d| _ntd| n|dkrd| _d S )Nr,   rA            rB      rC   r
   rD   
      rE   r&   rF          rG   5   @   i  z Unsupported floating-point type rH   r   )namer<   
SINT_TYPES
UINT_TYPESFP_TYPESOTHER_TYPESr   rN   rL   Zint_signednessr,   splitint_bitwidthprimitive_bitwidthrM   Zfp_mantissa_widthZexponent_biasr1   )selfrZ   r   r   r   __init__W   sT    $









zdtype.__init__c                 C  s
   d| j kS )NZfp8rZ   rb   r   r   r   is_fp8   s    zdtype.is_fp8c                 C  s
   | j dkS )NrB   rd   re   r   r   r   is_fp8e4   s    zdtype.is_fp8e4c                 C  s
   | j dkS )NrA   rd   re   r   r   r   is_fp8e4b15   s    zdtype.is_fp8e4b15c                 C  s
   | j dkS )NrD   rd   re   r   r   r   is_fp16   s    zdtype.is_fp16c                 C  s
   | j dkS )NrE   rd   re   r   r   r   is_bf16   s    zdtype.is_bf16c                 C  s
   | j dkS )NrF   rd   re   r   r   r   is_fp32   s    zdtype.is_fp32c                 C  s
   | j dkS )NrG   rd   re   r   r   r   is_fp64   s    zdtype.is_fp64c                 C  s
   | j dkS )Nr+   rd   re   r   r   r   is_int1   s    zdtype.is_int1c                 C  s
   | j dkS )Nr=   rd   re   r   r   r   is_int8   s    zdtype.is_int8c                 C  s
   | j dkS )Nr>   rd   re   r   r   r   is_int16   s    zdtype.is_int16c                 C  s
   | j dkS )Nr-   rd   re   r   r   r   is_int32   s    zdtype.is_int32c                 C  s
   | j dkS )Nr/   rd   re   r   r   r   is_int64   s    zdtype.is_int64c                 C  s
   | j dkS )Nr?   rd   re   r   r   r   is_uint8   s    zdtype.is_uint8c                 C  s
   | j dkS )Nr@   rd   re   r   r   r   	is_uint16   s    zdtype.is_uint16c                 C  s
   | j dkS )Nr.   rd   re   r   r   r   	is_uint32   s    zdtype.is_uint32c                 C  s
   | j dkS )Nr0   rd   re   r   r   r   	is_uint64   s    zdtype.is_uint64c                 C  s   | j tjkS N)rZ   r<   r]   re   r   r   r   is_floating   s    zdtype.is_floatingc                 C  s   | j tjkS rv   )rZ   r<   STANDARD_FP_TYPESre   r   r   r   is_standard_floating   s    zdtype.is_standard_floatingc                 C  s   | j tjkS rv   )rZ   r<   r[   re   r   r   r   is_int_signed   s    zdtype.is_int_signedc                 C  s   | j tjkS rv   )rZ   r<   r\   re   r   r   r   is_int_unsigned   s    zdtype.is_int_unsignedc                 C  s   | j tjtj kS rv   )rZ   r<   r[   r\   re   r   r   r   is_int   s    zdtype.is_intc                 C  s   |   S rv   )rm   re   r   r   r   is_bool   s    zdtype.is_boolc                   C  s   t dd S )NzNot implementedr1   r   r   r   r   is_void   s    zdtype.is_voidc                   C  s   dS NFr   r   r   r   r   is_block   s    zdtype.is_blockc                   C  s   dS r   r   r   r   r   r   is_ptr   s    zdtype.is_ptr)otherc                 C  s   t |tsdS | j|jkS r   )r)   r<   rZ   rb   r   r   r   r   __eq__   s    
zdtype.__eq__c                 C  s   |  | S rv   r   r   r   r   r   __ne__   s    zdtype.__ne__c                 C  s   t | jfS rv   )hashrZ   re   r   r   r   __hash__   s    zdtype.__hash__c                 C  s   | S rv   r   re   r   r   r   scalar   s    zdtype.scalar
ir.builderzir.typer;   r   c                 C  s   | j dkr| S | j dkr$| S | j dkr6| S | j dkrH| S | j dkrZ| S | j dkrl| S | j dkr~| S | j dkr| S | j d	kr|	 S | j d
kr|
 S | j dkr| S | j dkr| S | j dk r| S td|  dd S )NrH   r+   )r=   r?   )r>   r@   )r-   r.   )r/   r0   rC   rB   rA   rD   rE   rF   rG   zfail to convert z to ir type)rZ   Zget_void_tyZget_int1_tyZget_int8_tyZget_int16_tyZget_int32_tyZget_int64_tyZget_fp8e5_tyZget_fp8e4_tyZget_fp8e4b15_tyZget_half_tyZget_bf16_tyZget_float_tyZget_double_tyr   rb   r;   r   r   r   to_ir   s6    











zdtype.to_irc                 C  s   | j S rv   rd   re   r   r   r   __str__   s    zdtype.__str__strr"   c                 C  s   | j S )z"See cache_key_part() in triton.cc.rd   re   r   r   r   cache_key_part  s    zdtype.cache_key_partc                 C  s   d| j  S )Nztriton.language.rd   re   r   r   r   __repr__  s    zdtype.__repr__N).rI   rJ   rK   r[   r\   r]   rx   r^   r   rN   rc   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rw   ry   rz   r{   r|   r}   staticmethodr   r   r   r   r   r   propertyr   r   r   r   r   r   r   r   r   r<   L   sZ   -



r<   c                   @  sr   e Zd ZddddddZddd	d
dZdd Zdd Zdd Zd ddddZd ddddZ	e
dd ZdS )pointer_typer   r<   r,   )
element_tyaddress_spacec                 C  s,   t |tstd|| _|| _|  | _d S )Nz,element_ty is a {type(element_ty).__name__}.)r)   r<   	TypeErrorr   r   r   rZ   )rb   r   r   r   r   r   rc     s
    
zpointer_type.__init__r   zir.pointer_typer   c                 C  s   | | j|dS )Nr   )Z
get_ptr_tyr   r   r   r   r   r   r     s    zpointer_type.to_irc                 C  s   d| j  dS )Nzpointer<>r   re   r   r   r   r     s    zpointer_type.__str__c                 C  s   |   S rv   r   re   r   r   r   r     s    zpointer_type.__repr__c                 C  s   dS NTr   re   r   r   r   r     s    zpointer_type.is_ptrr!   r   r   c                 C  s&   t |tsdS | j|jko$| j|jkS r   )r)   r   r   r   r   r   r   r   r     s    
zpointer_type.__eq__c                 C  s   |  | S rv   r   r   r   r   r   r   $  s    zpointer_type.__ne__c                 C  s   | S rv   r   re   r   r   r   r   '  s    zpointer_type.scalarN)r   )rI   rJ   rK   rc   r   r   r   r   r   r   r   r   r   r   r   r   r   
  s   r   c                   @  s~   e Zd ZdddddZdddd	d
Zdd Zdd Zdd ZddddZd ddddZ	d ddddZ
edd ZdS )
block_typer<   r   )r   shapec                 C  s   || _ |stdt|d tr.dd |D }|| _d| _| jD ]}|  j|9  _q@| jtkrvtd| j dt d|  | _	d S )	Nz0d block_type is forbiddenr   c                 S  s   g | ]
}|j qS r   r8   .0sr   r   r   
<listcomp>7  s     z'block_type.__init__.<locals>.<listcomp>r   znumel (z') exceeds triton maximum tensor numel ())
r   r   r)   r6   r   numelTRITON_MAX_TENSOR_NUMELr   r   rZ   )rb   r   r   r   r   r   r   rc   -  s    

zblock_type.__init__r   zir.block_typer   c                 C  s   | | j|| jS rv   )Zget_block_tyr   r   r   r   r   r   r   r   B  s    zblock_type.to_irc                 C  s   d| j  d| j dS )N<z, r   )r   r   re   r   r   r   r   E  s    zblock_type.__str__c                 C  s   |   S rv   r   re   r   r   r   r   H  s    zblock_type.__repr__c                 C  s   dS r   r   re   r   r   r   r   K  s    zblock_type.is_blockz	List[int]r"   c                 C  s   | j S rv   )r   re   r   r   r   get_block_shapesN  s    zblock_type.get_block_shapesr!   r   c                 C  s&   t |tsdS | j|jko$| j|jkS r   )r)   r   r   r   r   r   r   r   r   Q  s    
zblock_type.__eq__c                 C  s   |  | S rv   r   r   r   r   r   r   V  s    zblock_type.__ne__c                 C  s   | j S rv   r   re   r   r   r   r   Y  s    zblock_type.scalarN)rI   rJ   rK   rc   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ,  s   r   c                   @  s4   e Zd ZddddddZdd Zdd	d
dZdS )function_typezList[dtype]None)	ret_typesparam_typesr   c                 C  s   || _ || _d S rv   )r   r   )rb   r   r   r   r   r   rc   _  s    zfunction_type.__init__c                 C  s   d| j  d| j S )Nzfn (z) -> )r   r   re   r   r   r   r   c  s    zfunction_type.__str__r   r;   c                   s4    fdd| j D } fdd| jD } ||S )Nc                   s   g | ]}|  qS r   r   r   tyr   r   r   r   g  s     z'function_type.to_ir.<locals>.<listcomp>c                   s   g | ]}|  qS r   r   )r   ret_typer   r   r   r   h  s     )r   r   Zget_function_ty)rb   r;   Zir_param_typesr   r   r   r   r   f  s    zfunction_type.to_irN)rI   rJ   rK   rc   r   r   r   r   r   r   r   ^  s   r   rH   r+   r=   r>   r-   r/   r?   r@   r.   r0   rC   rB   rA   rD   rE   rF   rG   c                   @  sF  e Zd ZdZdd ZddddZdd	 Zd
d Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8d9 Zd:d; Zd<d= Z d>d? Z!d@dA Z"dBdC Z#dDdE Z$dFdG Z%dHdI Z&dJdK Z'dLdM Z(dNdO Z)dPS )Qr6   zL
    This class is used to store a value that is known at compile-time.
    c                 C  s   t |tr|j| _n|| _d S rv   r)   r6   r8   )rb   r8   r   r   r   rc     s    

zconstexpr.__init__r   r"   c                 C  s   d| j  dS )Nz
constexpr[]r   re   r   r   r   r     s    zconstexpr.__repr__c                 C  s   | j S rv   r   re   r   r   r   	__index__  s    zconstexpr.__index__c                 C  s   t | j|j S rv   r6   r8   r   r   r   r   __add__  s    zconstexpr.__add__c                 C  s   t |j| j S rv   r   r   r   r   r   __radd__  s    zconstexpr.__radd__c                 C  s   t | j|j S rv   r   r   r   r   r   __sub__  s    zconstexpr.__sub__c                 C  s   t |j| j S rv   r   r   r   r   r   __rsub__  s    zconstexpr.__rsub__c                 C  s   t | j|j S rv   r   r   r   r   r   __mul__  s    zconstexpr.__mul__c                 C  s   t | j|j S rv   r   r   r   r   r   __mod__  s    zconstexpr.__mod__c                 C  s   t |j| j S rv   r   r   r   r   r   __rmul__  s    zconstexpr.__rmul__c                 C  s   t | j|j S rv   r   r   r   r   r   __truediv__  s    zconstexpr.__truediv__c                 C  s   t |j| j S rv   r   r   r   r   r   __rtruediv__  s    zconstexpr.__rtruediv__c                 C  s   t | j|j S rv   r   r   r   r   r   __floordiv__  s    zconstexpr.__floordiv__c                 C  s   t |j| j S rv   r   r   r   r   r   __rfloordiv__  s    zconstexpr.__rfloordiv__c                 C  s   t | j|jkS rv   r   r   r   r   r   __gt__  s    zconstexpr.__gt__c                 C  s   t |j| jkS rv   r   r   r   r   r   __rgt__  s    zconstexpr.__rgt__c                 C  s   t | j|jkS rv   r   r   r   r   r   __ge__  s    zconstexpr.__ge__c                 C  s   t |j| jkS rv   r   r   r   r   r   __rge__  s    zconstexpr.__rge__c                 C  s   t | j|jk S rv   r   r   r   r   r   __lt__  s    zconstexpr.__lt__c                 C  s   t |j| jk S rv   r   r   r   r   r   __rlt__  s    zconstexpr.__rlt__c                 C  s   t | j|jkS rv   r   r   r   r   r   __le__  s    zconstexpr.__le__c                 C  s   t |j| jkS rv   r   r   r   r   r   __rle__  s    zconstexpr.__rle__c                 C  s   t | j|jkS rv   r   r   r   r   r   r     s    zconstexpr.__eq__c                 C  s   t | j|jkS rv   r   r   r   r   r   r     s    zconstexpr.__ne__c                 C  s
   t | jS rv   )r!   r8   re   r   r   r   __bool__  s    zconstexpr.__bool__c                 C  s   t | j S rv   r   re   r   r   r   __neg__  s    zconstexpr.__neg__c                 C  s   t | j|j@ S rv   r   r   r   r   r   __and__  s    zconstexpr.__and__c                 C  s   t | jo|jS rv   r   r   r   r   r   logical_and  s    zconstexpr.logical_andc                 C  s   t | j|jB S rv   r   r   r   r   r   __or__  s    zconstexpr.__or__c                 C  s   t | j|jA S rv   r   r   r   r   r   __xor__  s    zconstexpr.__xor__c                 C  s   t | jp|jS rv   r   r   r   r   r   
logical_or  s    zconstexpr.logical_orc                 C  s   t | j
 S rv   r   re   r   r   r   __pos__  s    zconstexpr.__pos__c                 C  s   t | j S rv   r   re   r   r   r   
__invert__  s    zconstexpr.__invert__c                 C  s   t | j|j S rv   r   r   r   r   r   __pow__  s    zconstexpr.__pow__c                 C  s   t | j|j? S rv   r   r   r   r   r   
__rshift__  s    zconstexpr.__rshift__c                 C  s   t | j|j> S rv   r   r   r   r   r   
__lshift__  s    zconstexpr.__lshift__c                 C  s   t | j S rv   r   re   r   r   r   __not__  s    zconstexpr.__not__c                 O  s   | j ||S rv   r   )rb   r   kwdsr   r   r   __call__  s    zconstexpr.__call__N)*rI   rJ   rK   __doc__rc   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r6     sN   r6   c                   @  sF  e Zd ZddddZddddZed[d
dZd\ddZed]ddZd^ddZ	ed_ddZ
d`ddZedaddZdbddZedcddZeddddZededdZedfd d!Zedgd"d#Zedhd$d%Zedid&d'Zedjd(d)Zedkd*d+Zedld,d-Zedmd.d/Zednd0d1Zedod2d3Zedpd4d5Zedqd6d7Zedrd8d9Zedsd:d;Zedtd<d=Zedud>d?Z edvd@dAZ!edwdBdCZ"edxdDdEZ#edydFdGZ$edzdHdIZ%ed{dJdKZ&ed|dLdMZ'ed}dNdOZ(ed~dPdQZ)eddRdSZ*eddTdUZ+e,dVdW Z-eddYdZZ.d	S )r*   r<   )r9   c                 C  sl   || _ d| _| r|j| _d| _| jD ]}|  j|9  _q(t| j| _|| _|j| _dd | jD | _d S )N)r   r   c                 S  s   g | ]}t |qS r   r6   r   r   r   r   r     s     z#tensor.__init__.<locals>.<listcomp>)handler   r   r   r6   r9   r   r<   )rb   r   r9   r   r   r   r   rc     s    
ztensor.__init__r   r"   c                 C  s(   t | jd ddd | jD  d S )N[,c                 s  s   | ]}t |V  qd S rv   )r   r   r   r   r   	<genexpr>  s     z!tensor.__str__.<locals>.<genexpr>r   )r   r<   joinr   re   r   r   r   r     s    ztensor.__str__Nc                 C  s   t ||}t| ||S rv   )r7   r   addrb   r   r   r   r   r   r     s    
ztensor.__add__c                 C  s   | j ||dS Nr   )r   r   r   r   r   r     s    ztensor.__radd__c                 C  s   t ||}t| ||S rv   r7   r   subr   r   r   r   r     s    
ztensor.__sub__c                 C  s   t ||}t|| |S rv   r   r   r   r   r   r   #  s    
ztensor.__rsub__c                 C  s   t ||}t| ||S rv   )r7   r   mulr   r   r   r   r   '  s    
ztensor.__mul__c                 C  s   | j ||dS r   )r   r   r   r   r   r   ,  s    ztensor.__rmul__c                 C  s   t ||}t| ||S rv   r7   r   truedivr   r   r   r   r   /  s    
ztensor.__truediv__c                 C  s   t ||}t|| |S rv   r   r   r   r   r   r   4  s    
ztensor.__rtruediv__c                 C  s   t ||}t| ||S rv   r7   r   floordivr   r   r   r   r   8  s    
ztensor.__floordiv__c                 C  s   t ||}t|| |S rv   r   r   r   r   r   r   =  s    
ztensor.__rfloordiv__c                 C  s   t ||}t| ||S rv   r7   r   modr   r   r   r   r   B  s    
ztensor.__mod__c                 C  s   t ||}t|| |S rv   r   r   r   r   r   __rmod__G  s    
ztensor.__rmod__c                 C  s   t | |S rv   )r   minusrb   r   r   r   r   r   M  s    ztensor.__neg__c                 C  s   t | |S rv   )r   invertr   r   r   r   r   Q  s    ztensor.__invert__c                 C  s   t ||}t| ||S rv   r7   r   and_r   r   r   r   r   W  s    
ztensor.__and__c                 C  s   t ||}t|| |S rv   r   r   r   r   r   __rand__\  s    
ztensor.__rand__c                 C  s   t ||}t| ||S rv   r7   r   or_r   r   r   r   r   a  s    
ztensor.__or__c                 C  s   t ||}t|| |S rv   r   r   r   r   r   __ror__f  s    
ztensor.__ror__c                 C  s   t ||}t| ||S rv   r7   r   Zxor_r   r   r   r   r   k  s    
ztensor.__xor__c                 C  s   t ||}t|| |S rv   r   r   r   r   r   __rxor__p  s    
ztensor.__rxor__c                 C  s   t ||}t| ||S rv   r7   r   Zshlr   r   r   r   r   u  s    
ztensor.__lshift__c                 C  s   t ||}t|| |S rv   r   r   r   r   r   __rlshift__z  s    
ztensor.__rlshift__c                 C  s4   t ||}| j r"t| ||S t| ||S d S rv   r7   r<   rz   r   ZashrZlshrr   r   r   r   r     s    

ztensor.__rshift__c                 C  s4   t ||}| j r"t|| |S t|| |S d S rv   r   r   r   r   r   __rrshift__  s    

ztensor.__rrshift__c                 C  s   t ||}t| ||S rv   r7   r   greater_thanr   r   r   r   r     s    
ztensor.__gt__c                 C  s   t ||}t|| |S rv   r   r   r   r   r   r     s    
ztensor.__rgt__c                 C  s   t ||}t| ||S rv   r7   r   Zgreater_equalr   r   r   r   r     s    
ztensor.__ge__c                 C  s   t ||}t|| |S rv   r   r   r   r   r   r     s    
ztensor.__rge__c                 C  s   t ||}t| ||S rv   r7   r   	less_thanr   r   r   r   r     s    
ztensor.__lt__c                 C  s   t ||}t|| |S rv   r   r   r   r   r   r     s    
ztensor.__rlt__c                 C  s   t ||}t| ||S rv   r7   r   Z
less_equalr   r   r   r   r     s    
ztensor.__le__c                 C  s   t ||}t|| |S rv   r  r   r   r   r   r     s    
ztensor.__rle__c                 C  s   t ||}t| ||S rv   )r7   r   equalr   r   r   r   r     s    
ztensor.__eq__c                 C  s   t ||}t| ||S rv   )r7   r   	not_equalr   r   r   r   r     s    
ztensor.__ne__c                 C  s   t ||}t| ||S rv   )r7   r   r   r   r   r   r   r     s    
ztensor.logical_andc                 C  s   t ||}t| ||S rv   )r7   r   r   r   r   r   r   r     s    
ztensor.logical_orc                 C  s   t | |S rv   )r   not_r   r   r   r   r     s    ztensor.__not__c                 C  s   t |tr|g}| }t|D ]h\}}t |trH|jd krHt|||}qt |trr|jd krr|jd krr|j	d krrqdst
d| q|S )NFzunsupported tensor index: )r)   slice	enumerater6   r8   r   expand_dimsstartstopstepr   )rb   Zslicesr   retdimslr   r   r   __getitem__  s    
(ztensor.__getitem__c                 C  s   dst dd S )NFz0Transposition must be created by the AST Visitor)r   re   r   r   r   r     s    ztensor.TFc                 C  s0   t |tr|j}|r"t| ||S t| ||S rv   )r)   r6   r8   r   bitcastcast)rb   r<   r  r   r   r   r   to  s
    
z	tensor.to)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)N)FN)/rI   rJ   rK   rc   r   r    r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r  r   r   r   r   r*     s   





r*   c                 C  s   t | tr| jS | S rv   r   )vr   r   r   _constexpr_to_value  s    
r  Nc                 C  s   t | } t| |S )z
    Returns the id of the current program instance along the given :code:`axis`.

    :param axis: The axis of the 3D launch grid. Has to be either 0, 1 or 2.
    :type axis: int
    )r  r   
program_idaxisr   r   r   r   r    s    r  c                 C  s   t | } t| |S )z
    Returns the number of program instances launched along the given :code:`axis`.

    :param axis: The axis of the 3D launch grid. Has to be either 0, 1 or 2.
    :type axis: int
    )r  r   num_programsr  r   r   r   r    s    r  c                 C  s   t | } t |}t| ||S )ar  
    Returns contiguous values within the left-closed and right-open interval [:code:`start`, :code:`end`).     End - Start must be less than or equal to TRITON_MAX_TENSOR_NUMEL = 131072

    :param start: Start of the interval. Must be a power of two.
    :type start: int32
    :param end: End of the interval. Must be a power of two > start.
    :type end: int32
    )r  r   arange)r	  endr   r   r   r   r     s    r  c                 C  sj   t | } t| D ]J\}}t|ts2td| dt|jtstd| dt|j dqdd | D S )NzShape element  must have type `constexpr`1 must have type `constexpr[int]`, got `constexpr[r   c                 S  s   g | ]}t |qS r   )r  r   r:   r   r   r   r   7  s     z%_shape_check_impl.<locals>.<listcomp>)r  r  r)   r6   r   r8   r,   r9   )r   idr   r   r   _shape_check_impl0  s    
r   c                 C  s(   t | } t|}t|}t| |||S )a]  
    Returns a tensor filled with the scalar value for the given :code:`shape` and :code:`dtype`.

    :param shape: Shape of the new array, e.g., (8, 16) or (8, )
    :value value: A scalar value to fill the array with
    :type shape: tuple of ints
    :param dtype: Data-type of the new array, e.g., :code:`tl.float16`
    :type dtype: DType
    )r   r  r   full)r   r8   r<   r   r   r   r   r!  :  s    r!  c                 C  s   t | ||S )z
    Tries to broadcast the two given blocks to a common compatible shape.

    :param input: The first input tensor.
    :type input: Block
    :param other: The second input tensor.
    :type other: Block
    )r   Zbroadcast_impl_value)inputr   r   r   r   r   	broadcastP  s    
r#  c                 C  s   t |}t| ||S )z
    Tries to broadcast the given tensor to a new :code:`shape`.

    :param input: The input tensor.
    :type input: Block
    :param shape: The desired shape.
    :type shape: Tuple[int]
    )r   r   Zbroadcast_impl_shaper"  r   r   r   r   r   broadcast_to]  s    
r%  c                 C  s   t | |S rv   )r   trans)r"  r   r   r   r   r&  k  s    r&  Fc                 C  s   t | |||S )aq  
    Concatenate the given blocks

    :param input: The first input tensor.
    :type input:
    :param other: The second input tensor.
    :type other:
    :param reorder: Compiler hint. If true, the compiler is
    allowed to reorder elements while concatenating inputs.
    Only use if the order does not matter (e.g., result is
    only used in reduction ops)
    )r   cat)r"  r   Zcan_reorderr   r   r   r   r'  p  s    r'  c                 C  s   t |}t| ||S )z
    Returns a tensor with the same elements as `input` but a different shape.
    The order of the elements may not be preserved.

    :param input: The input tensor.
    :type input:
    :param shape: The desired shape.
    :type shape: Tuple[int]

    )r   r   viewr$  r   r   r   r(    s    r(  c                 C  s   t |}t| ||S rv   )r   r   reshaper$  r   r   r   r)    s    r)  c                 C  sH   | |   kr|k s4n t d|  d|  d| | dkr@| S | | S )Nzinvalid axis z. Expected z <= axis < r   r   )r  ndimr   r   r   
_wrap_axis  s    r+  c                   s   t |}t|trt|n|g}t| jt|   fdd|D }tt|t|krftd| | }t|D ]}t	
|||}qr|S )aR  
    Expand the shape of a tensor, by inserting new length-1 dimensions.

    Axis indices are with respect to the resulting tensor, so
    ``result.shape[axis]`` will be 1 for each axis.

    :param input: The input tensor.
    :type input: tl.tensor
    :param axis: The indices to add new axes
    :type axis: int | Sequence[int]

    c                   s   g | ]}t t| qS r   )r+  r  r   r  Znew_ndimr   r   r     s     zexpand_dims.<locals>.<listcomp>z7expand_dims recieved duplicate axes, normalized axes = )r  r)   r   listlenr   setr   sortedr   r  )r"  r  r   Zaxesr  ar   r-  r   r    s    r  Tc                 C  s"   t |}t |}t| ||||S )a  
    Returns the matrix product of two blocks.

    The two blocks must be two-dimensional and have compatible inner dimensions.

    :param input: The first tensor to be multiplied.
    :type input: 2D tensor of scalar-type in {:code:`float16`, :code:`bfloat16`, :code:`float32`}
    :param other: The second tensor to be multiplied.
    :type other: 2D tensor of scalar-type in {:code:`float16`, :code:`bfloat16`, :code:`float32`}
    )r  r   dot)r"  r   Z
allow_tf32Z	out_dtyper   r   r   r   r3    s    r3   c	           	      C  sf   t |dk	rt||}t |dk	r,t||}t |}t |}t |}t |}t| ||||||||	S )al  
    Return a tensor of data whose values are loaded from memory at location defined by `pointer`:
        (1) `pointer` could be a single element pointer, then a scalar will be loaded
            - `mask` and `other` must be scalar too
            - `other` is implicitly typecast to `pointer.dtype.element_ty`
            - `boundary_check` and `padding_option` must be empty
        (2) `pointer` could be element-wise tensor of pointers, in which case:
            - `mask` and `other` are implicitly broadcast to `pointer.shape`
            - `other` is implicitly typecast to `pointer.dtype.element_ty`
            - `boundary_check` and `padding_option` must be empty
        (3) `pointer` could be a block pointer defined by `make_block_ptr`, in which case:
            - `mask` and `other` must be None
            - `boundary_check` and `padding_option` can be specified to control the behavior of out-of-bound access

    :param pointer: Pointer to the data to be loaded
    :type pointer: `triton.PointerType`, or block of `dtype=triton.PointerType`
    :param mask: if `mask[idx]` is false, do not load the data at address `pointer[idx]`
        (must be `None` with block pointers)
    :type mask: Block of `triton.int1`, optional
    :param other: if `mask[idx]` is false, return `other[idx]`
    :type other: Block, optional
    :param boundary_check: tuple of integers, indicating the dimensions which should do the boundary check
    :type boundary_check: tuple of ints, optional
    :param padding_option: should be one of {"", "zero", "nan"}, do padding while out of bound
    :param cache_modifier: changes cache option in NVIDIA PTX
    :type cache_modifier: str, optional
    :param eviction_policy: changes eviction policy in NVIDIA PTX
    :type eviction_policy: str, optional
    :param volatile: changes volatile option in NVIDIA PTX
    :type volatile: bool, optional
    N)r  r7   r   load)	pointermaskr   boundary_checkZpadding_optioncache_modifiereviction_policyZvolatiler   r   r   r   r5    s    #

 r5  r   c              	   C  sF   t ||}t|dk	r t ||}t|}t|}t| ||||||S )a  
    Store a tensor of data into memory locations defined by `pointer`:
        (1) `pointer` could be a single element pointer, then a scalar will be stored
            - `mask` must be scalar too
            - `boundary_check` and `padding_option` must be empty
        (2) `pointer` could be element-wise tensor of pointers, in which case:
            - `mask` is implicitly broadcast to `pointer.shape`
            - `boundary_check` must be empty
        (3) or `pointer` could be a block pointer defined by `make_block_ptr`, in which case:
            - `mask` must be None
            - `boundary_check` can be specified to control the behavior of out-of-bound access
    `value` is implicitly broadcast to `pointer.shape` and typecast to `pointer.dtype.element_ty`.

    :param pointer: The memory location where the elements of `value` are stored
    :type pointer: `triton.PointerType`, or block of `dtype=triton.PointerType`
    :param value: The tensor of elements to be stored
    :type value: Block
    :param mask: If `mask[idx]` is false, do not store `value[idx]` at `pointer[idx]`
    :type mask: Block of triton.int1, optional
    :param boundary_check: tuple of integers, indicating the dimensions which should do the boundary check
    :type boundary_check: tuple of ints, optional
    :param cache_modifier: changes cache option in NVIDIA PTX
    :type cache_modifier: str, optional
    :param eviction_policy: changes eviction policy in NVIDIA PTX
    :type eviction_policy: str, optional
    N)r7   r  r   store)r6  r8   r7  r8  r9  r:  r   r   r   r   r;    s    

r;  )basec              	   C  s   t | ||||||S )ak  
    Returns a pointer to a block in a parent tensor

    :param base: The base pointer to the parent tensor
    :param shape: The shape of the parent tensor
    :param strides: The strides of the parent tensor
    :param offsets: The offsets to the block
    :param block_shape: The shape of the block
    :param order: The order of the original data format
    )r   make_block_ptr)r<  r   stridesoffsetsZblock_shapeorderr   r   r   r   r=  (  s    r=  c                 C  s   t | ||S )z
    Advance a block pointer

    :param base: the block pointer to advance
    :param offsets: the offsets to advance, a tuple by dimension
    )r   advance)r<  r?  r   r   r   r   rA  7  s    rA  r   zCallable[[T], T])rZ   r   c                   s   ddd fdd}|S )Nr   funcr   c                   s   d}|j  d| _| S )Na+  
    Performs an atomic {name} at the memory location specified by :code:`pointer`.

    Return the data stored at :code:`pointer` before the atomic operation.

    :param pointer: The memory locations to compare-and-swap.
    :type pointer: Block of dtype=triton.PointerDType
    :param cmp: The values expected to be found in the atomic object
    :type cmp: Block of dtype=`pointer.dtype.element_ty`
    :param val: The values to copy in case the expected value matches the contained value.
    :type val: Block of dtype=`pointer.dtype.element_ty`
    rd   formatr   rC  Zdocstrrd   r   r   
_decoratorH  s    z&_add_atomic_docstr.<locals>._decoratorr   rZ   rG  r   rd   r   _add_atomic_docstrF  s    rI  zcompare-and-swapc                 C  s.   t ||}t ||}t|}t| ||||S rv   )r7   r  r   
atomic_cas)r6  cmpvalsemr   r   r   r   rJ  [  s    

rJ  Zexchangec                 C  s$   t ||}t|}t| ||||S rv   )r7   r  r   atomic_xchgr6  rL  r7  rM  r   r   r   r   rN  d  s    
rN  r   c                 C  s$   t ||}t|}t| ||||S rv   )r7   r  r   
atomic_addrO  r   r   r   rP  l  s    
rP  maxc                 C  s$   t ||}t|}t| ||||S rv   )r7   r  r   
atomic_maxrO  r   r   r   rR  t  s    
rR  minc                 C  s$   t ||}t|}t| ||||S rv   )r7   r  r   
atomic_minrO  r   r   r   rT  |  s    
rT  zlogical andc                 C  s$   t ||}t|}t| ||||S rv   )r7   r  r   
atomic_andrO  r   r   r   rU    s    
rU  z
logical orc                 C  s$   t ||}t|}t| ||||S rv   )r7   r  r   	atomic_orrO  r   r   r   rV    s    
rV  zlogical xorc                 C  s$   t ||}t|}t| ||||S rv   )r7   r  r   
atomic_xorrO  r   r   r   rW    s    
rW  c                 C  s.   t | |} t ||}t ||}t| |||S )a  
    Returns a tensor of elements from either :code:`x` or :code:`y`, depending on :code:`condition`.

    Note that :code:`x` and :code:`y` are always evaluated regardless of the value of :code:`condition`.

    If you want to avoid unintended memory operations, use the :code:`mask` arguments in `triton.load` and `triton.store` instead.

    The shape of :code:`x` and :code:`y` are both broadcast to the shape of :code:`condition`.
    :code:`x` and :code:`y` must have the same data type.

    :param condition: When True (nonzero), yield x, otherwise yield y.
    :type condition: Block of triton.bool
    :param x: values selected at indices where condition is True.
    :param y: values selected at indices where condition is False.
    )r7   r   where)	conditionr:   yr   r   r   r   rX    s    


rX  c                 C  s"   t | |} t ||}t| ||S rv   )r7   r   umulhi)r:   rZ  r   r   r   r   r[    s    

r[  c                 C  s   t |}t| |||S rv   )r  r   fdiv)r:   rZ  Zieee_roundingr   r   r   r   r\    s    r\  c                   s   ddd fdd}|S )Nr   rB  c                   s   d}|j  d| _| S )Nzk
    Computes the element-wise {name} of :code:`x`.

    :param x: the input values
    :type x: Block
    rd   rD  rF  rd   r   r   rG    s    z)_add_math_1arg_docstr.<locals>._decoratorr   rH  r   rd   r   _add_math_1arg_docstr  s    
r]  Zexponentialc                 C  s   t | |S rv   )r   expr:   r   r   r   r   r^    s    r^  znatural logarithmc                 C  s   t | |S rv   )r   logr_  r   r   r   r`    s    r`  Zcosinec                 C  s   t | |S rv   )r   cosr_  r   r   r   ra    s    ra  Zsinec                 C  s   t | |S rv   )r   sinr_  r   r   r   rb    s    rb  zsquare rootc                 C  s   t | |S rv   )r   sqrtr_  r   r   r   rc    s    rc  zabsolute valuec                 C  s   t | |S rv   )r   r'   r_  r   r   r   r'     s    r'   )rZ   return_indices_argtie_break_argr   c                   s   ddd fdd}|S )Nr   rB  c                   sL   d}d k	r"|d d  d7 }d k	r:|d d7 }|j  d| _| S )Nz
    Returns the {name} of all elements in the :code:`input` tensor along the provided :code:`axis`

    :param input: the input values
    :param axis: the dimension along which the reduction should be donez
    :param z-: if true, return index corresponding to the z valuezR: if true, return the left-most indices in case of ties for values that aren't NaNrd   rD  rF  rZ   rd  re  r   r   rG    s    

z)_add_reduction_docstr.<locals>._decoratorr   rZ   rd  re  rG  r   rf  r   _add_reduction_docstr  s    rh  c                 c  s   |   }d V  | | d S rv   )Zget_insertion_pointZrestore_insertion_point)r;   ipr   r   r   _insertion_guard  s    rj  c                   sT   t tr"tf| dd S  fdd}|dk	rDt|}t|| S )aR  Applies the combine_fn to all elements in :code:`input` tensors along the provided :code:`axis`

    :param input: the input tensor, or tuple of tensors
    :param axis: the dimension along which the reduction should be done
    :param combine_fn: a function to combine two groups of scalar tensors (must be marked with @triton.jit)

    r   
_generatorr   c              	     s   dd D }t ||d }| d}tz fdd|jD }||  fddt|jD }j|i d}t|tr|j	g}ndd |D }j
|  W 5 Q R X d S )	Nc                 S  s   g | ]}|j jqS r   r9   r   r   tr   r   r   r   )  s     z7reduce.<locals>.make_combine_region.<locals>.<listcomp>r
   r   c                   s   g | ]}|  qS r   r   r   r   r   r   r   .  s     c                   s    g | ]\}}t  ||qS r   r*   argr   r  r   blockr   r   r   0  s   r   c                 S  s   g | ]
}|j qS r   r   r   rr   r   r   r   6  s     )r   
get_regionrj  r   create_block_with_parentr  call_JitFunctionr)   r*   r   Zcreate_reduce_ret)Z	reduce_opin_scalar_tys	prototyperegionr   r   resultshandlesr   rl  
combine_fnr"  rs  r   make_combine_region(  s    




z#reduce.<locals>.make_combine_regionN)r)   r*   reducer  r   Z	reductionr"  r  r  r   rl  r  r   r  r   r    s    	

 r  c                 C  sB   | j j}| r(|jdk r(| jt|dS |tkr>| jt|dS | S )NrW   r   )r9   r   r|   r`   r  r-   bfloat16r4   )ro  r   	scalar_tyr   r   r   _promote_reduction_input=  s    r  c           
      C  s   t |}| j| }td||d}t| jdkrjdd tt| jD }||= t|||d}t|| j|d}t| |f||||d\}}	||	fS )Nr   r   r   c                 S  s   g | ]}t |qS r   r   r,  r   r   r   r   U  s     z(_reduce_with_indices.<locals>.<listcomp>rk  )r  r   r  r/  ranger  r%  r  )
r"  r  r  r   rl  nindexZaxes_to_expandZrvalueZrindicesr   r   r   _reduce_with_indicesM  s    
 
r  c                 C  s   t | |k | |S )z
    Computes the element-wise minimum of :code:`x` and :code:`y`.

    :param input: the first input tensor
    :type input: Block
    :param other: the second input tensor
    :type other: Block
    rX  r:   rZ  r   r   r   minimum_  s    
r  c                 C  s   t | |k| |S )z
    Computes the element-wise maximum of :code:`x` and :code:`y`.

    :param input: the first input tensor
    :type input: Block
    :param other: the second input tensor
    :type other: Block
    r  r  r   r   r   maximuml  s    
r  c           	      C  sF   |r| |ko||k }nd}| |kp$|}t || |}t |||}||fS r   r  )	value1index1value2index2tie_break_lefttiegtZv_retZi_retr   r   r   _argmax_combine{  s    r  c                 C  s   t | |||dS r   r  r  r  r  r  r   r   r   _argmax_combine_tie_break_left  s    r  c                 C  s   t | |||dS r   r  r  r   r   r   _argmax_combine_tie_break_fast  s    r  c                 C  s   t | |S rv   )r   rQ  r  r   r   r   	_fast_max  s    r  return_indicesreturn_indices_tie_break_left)rd  re  c                 C  s|   t | } |r*|rt| |tS t| |tS nNt| jjdk rlt| j rT| t	} n| j
 sbt| t} t| |tS d S NrW   )r  r  r  r  r6   r<   ra   rw   r  r4   is_integer_typer   r-   r  r  r"  r  r  r  r   r   r   rQ    s    
zmaximum indexr  )re  c                 C  s   t | |d|d\}}|S NT)r  r  )rQ  r"  r  r  _r  r   r   r   argmax  s    r  c           	      C  sF   |r| |ko||k }nd}| |k p$|}t || |}t |||}||fS r   r  )	r  r  r  r  r  r  ltZ	value_retZ	index_retr   r   r   _argmin_combine  s    r  c                 C  s   t | |||dS r   r  r  r   r   r   _argmin_combine_tie_break_left  s    r  c                 C  s   t | |||dS r   r  r  r   r   r   _argmin_combine_tie_break_fast  s    r  c                 C  s   t | |S rv   )r   rS  r  r   r   r   	_fast_min  s    r  c                 C  s|   t | } |r*|rt| |tS t| |tS nNt| jjdk rlt| j rT| t	} n| j
 sbt| t} t| |tS d S r  )r  r  r  r  r6   r<   ra   rw   r  r4   r  r   r-   r  r  r  r   r   r   rS    s    
zminimum indexc                 C  s   t | |d|d\}}|S r  )rS  r  r   r   r   argmin  s    r  c                 C  s   | | S rv   r   r2  br   r   r   _sum_combine  s    r  sumc                 C  s   t | } t| |tS rv   )r  r  r  r"  r  r   r   r   r    s    c                 C  s   | |A S rv   r   r  r   r   r   _xor_combine  s    r  zxor sumc                 C  s6   | j j}| stdt| |d} t| |t||dS )Nz#xor_sum only supported for integersr   rk  )r9   r   r|   r   r  r  r  )r"  r  r   rl  r  r   r   r   xor_sum  s     r  c                   s   ddd fdd}|S )Nr   rB  c                   s   d}|j  d| _| S )Nz
    Returns the {name} of all elements in the :code:`input` tensor along the provided :code:`axis`

    :param input: the input values
    :param axis: the dimension along which the scan should be donerd   rD  rF  rd   r   r   rG    s    z$_add_scan_docstr.<locals>._decoratorr   rg  r   rd   r   _add_scan_docstr  s    	r  c                   sL   t tr"tf| dd S  fdd}t|}t|| S )au  Applies the combine_fn to each elements with a carry in :code:`input` tensors along the provided :code:`axis` and update the carry

    :param input: the input tensor, or tuple of tensors
    :param axis: the dimension along which the reduction should be done
    :param combine_fn: a function to combine two groups of scalar tensors (must be marked with @triton.jit)

    rk  r   c              	     s   dd D }t ||d }| d}tz fdd|jD }||  fddt|jD }j|i d}t|tr|j	g}ndd |D }j
|  W 5 Q R X d S )	Nc                 S  s   g | ]}|j jqS r   rm  rn  r   r   r   r   -  s     zAassociative_scan.<locals>.make_combine_region.<locals>.<listcomp>r
   r   c                   s   g | ]}|  qS r   r   r   r   r   r   r   2  s     c                   s    g | ]\}}t  ||qS r   rp  rr  rs  r   r   r   4  s   ru  c                 S  s   g | ]
}|j qS r   rv  rw  r   r   r   r   :  s     )r   ry  rj  r   rz  r  r{  r)   r*   r   Zcreate_scan_ret)Zscan_opr|  r}  r~  r   r   r  r  r  rs  r   r  ,  s    




z-associative_scan.<locals>.make_combine_region)r)   r*   associative_scanr  r   r  r   r  r   r    s    	

 r  cumsumc                 C  s   t | } t| |tS rv   )r  r  r  r  r   r   r   r  B  s    c                 C  s   | | S rv   r   r  r   r   r   _prod_combineL  s    r  cumprodc                 C  s   t | } t| |tS rv   )r  r  r  r  r   r   r   r  Q  s    c                 C  s
   t | S )zA
    Insert a barrier to synchronize all threads in a block.
    )r   debug_barrierr   r   r   r   r  ]  s    r  c                 C  s~   t |tr|g}t|D ]J\}}t |ts:td| dt |jtstd| dt|j dqdd |D }t| |S )ze
    Let the compiler knows that the values in :code:`input` are all multiples of :code:`value`.
    values element r  r  r   c                 S  s   g | ]
}|j qS r   r   r  r   r   r   r   q  s     zmultiple_of.<locals>.<listcomp>)	r)   r6   r  r   r8   r,   r9   r   multiple_ofr"  valuesr   r  r  r   r   r   r  e  s    

r  c                 C  s~   t |tr|g}t|D ]J\}}t |ts:td| dt |jtstd| dt|j dqdd |D }t| |S )z_
    Let the compiler knows that the `value` first values in :code:`input` are contiguous.
    r  r  r  r   c                 S  s   g | ]
}|j qS r   r   r  r   r   r   r     s     z"max_contiguous.<locals>.<listcomp>)	r)   r6   r  r   r8   r,   r9   r   max_contiguousr  r   r   r   r  u  s    

r  c                 C  s~   t |tr|g}t|D ]J\}}t |ts:td| dt |jtstd| dt|j dqdd |D }t| |S )z
    Let the compiler knows that the `value` first values in :code:`input` are constant.

    e.g. if :code:`values` is [4], then each group of 4 values in :code:`input` should all be equal,
    for example [0, 0, 0, 0, 1, 1, 1, 1].
    r  r  r  r   c                 S  s   g | ]
}|j qS r   r   r  r   r   r   r     s     z!max_constancy.<locals>.<listcomp>)	r)   r6   r  r   r8   r,   r9   r   max_constancyr  r   r   r   r    s    

r   
)sepr  fileflushr   )r  r  c                 G  s   dS )a{  
    Print the values at compile time.  The parameters are the same as the builtin :code:`print`.

    NOTE: Calling the Python builtin :code:`print` is not the same as calling this, it instead maps to :code:`device_print`,
    which has special requirements for the arguments.

    .. highlight:: python
    .. code-block:: python

        tl.static_print(f"{BLOCK_SIZE=}")
    Nr   )r  r  r  r  r   r  r   r   r   static_print  s    r  c                 C  s   dS )z
    Assert the condition at compile time.  Does not require that the :code:`TRITON_DEBUG` environment variable
    is set.

    .. highlight:: python
    .. code-block:: python

        tl.static_assert(BLOCK_SIZE == 1024)
    Nr   )condmsgr   r   r   r   static_assert  s    r  r   c                G  s   ddl }t| } t| ts(t|  dd}| D ]}||jkr0d} qHq0|sZt|  dg }|D ]}|t|| qbt	| ||S )a  
    Print the values at runtime from the device.  String formatting does not work for runtime values, so you should
    provide the values you want to print as arguments.  The first value must be a string, all following values must
    be scalars or tensors.

    Calling the Python builtin :code:`print` is the same as calling this function, and the requirements for the arguments will match
    this function (not the normal requirements for :code:`print`).

    .. highlight:: python
    .. code-block:: python

        tl.device_print("pid", pid)
        print("pid", pid)

    :param prefix: a prefix to print before the values. This is required to be a string literal.
    :param args: the values to print. They can be any tensor or scalar.
    r   Nz is not stringTFz is not an ascii string)
stringr  r)   r   r   	printableappendr7   r   device_print)prefixr   r   r  Zb_asciichnew_argsrq  r   r   r   r    s    
r  c           	      C  s   t |}ddl}| }||}t|dr>|j}||}q"d}d}d}|dk	rl|jj}|jjj}|jj	}t
t| ||||||S )a(  
    Assert the condition at runtime from the device.  Requires that the environment variable :code:`TRITON_DEBUG`
    is set to a value besides :code:`0` in order for this to have any effect.

    Using the Python :code:`assert` statement is the same as calling this function, except that the second argument
    must be provided and must be a string, e.g. :code:`assert pid == 0, "pid != 0"`.  The environment variable must
    be set for this :code:`assert` statement to have any effect.

    .. highlight:: python
    .. code-block:: python

        tl.device_assert(pid == 0)
        assert pid == 0, f"pid != 0"

    :param cond: the condition to assert. This is required to be a boolean tensor.
    :param msg: the message to print if the assertion fails. This is required to be a string literal.
    r   NrI   unknown)r  inspectcurrentframe	getmodulehasattrf_backf_codeco_nameco_filenamef_linenor   device_assertr7   )	r  r  r   r  framemodulelineno	func_name	file_namer   r   r   r    s    


r  c                   @  s*   e Zd ZdZd	ddZdd Zdd ZdS )
static_rangea  
    Iterator that counts upward forever.

    .. highlight:: python
    .. code-block:: python

        @triton.jit
        def kernel(...):
            for i in tl.static_range(10):
                ...
    :note: This is a special iterator used to implement similar semantics to Python's :code:`range` in the context of
        :code:`triton.jit` functions. In addition, it also guides the compiler to unroll the loop aggressively.
    :param arg1: the start value.
    :param arg2: the end value.
    :param step: the step value.
    Nc                 C  sn   t |tst|d kr"td| _nt |ts0t|| _|d krPtd| _|| _nt |ts^t|| _|| _d S )Nr   r   )r)   r6   r   r  r	  r  )rb   Zarg1Zarg2r  r   r   r   rc     s    
zstatic_range.__init__c                 C  s   t dd S Nz8static_range can only be used in @triton.jit'd functionsr~   re   r   r   r   __iter__+  s    zstatic_range.__iter__c                 C  s   t dd S r  r~   re   r   r   r   __next__.  s    zstatic_range.__next__)NN)rI   rJ   rK   r   rc   r  r  r   r   r   r   r  	  s   
r  r.  dicttuple)lib_namelib_pathr   arg_type_symbol_dict	ret_shapeis_purec              	   C  s  t |dkrtdt t| d }t ||krLtdt | d| g }	g }
|D ]@}t|tr|	|j |
|j qX|	t	| |
| qXt
|	}	|	|krtd|  d|	 nD||	 d }||	 d }|rt||}t| ||||
||||S dS )a  
        Dispatch a function to a library
        :param func: the function to dispatch
        :param lib_name: the name of the library
        :param lib_path: the path of the library
        :param args: the arguments of the function
        :param arg_type_symbol_dict: the type of the arguments
        :param ret_shape: the shape of the return value
        :param _builder: the builder
        :return: the return value of the function
    r   zarg_type_symbol_dict is emptyz+length of input args does not match.Expect z, got z,input arg type does not match.Expect one of r   N)r/  r   r.  keysr)   r*   r  r<   r   r9   r  r   r   )rC  r  r  r   r  r  r  r   num_args	arg_typesarg_listrq  symbolr   r   r   r   dispatch6  s*    

r  )r  r  r   r  r  c              	   C  s  |  }d}d}g }	tt|D ]8}
t||
 |||
< |	||
 j ||
 j r d}q t|	dkrt|	}	d}|	|kr~d}|d }t	|D ]\}
}t
j||||d\}}qtt|D ]"}
t
j||
 |||d\||
< }q|s|j}t|d}t|| ||||||S )a  
        Dispatch an elementwise function to a library
        :param lib_name: the name of the library
        :param lib_path: the path of the library
        :param args: the arguments of the function
        :param arg_type_symbol_dict: the type of the arguments
        :param is_pure: whether the function is pure
        :param _builder: the builder
        :return: the return value of the function
    TNFr   )arithmetic_checkZcreate_extern_elementwise)copyr  r/  r7   r  r<   r9   r   r  r  r   Zbinary_op_type_checking_implr   r#   r  )r  r  r   r  r  r   Zdispatch_argsZ
all_scalarr  r  r  r  Zbroadcast_argitemr  rC  r   r   r   extern_elementwise`  sB          
r  c                 C  s   t | S )z#A decorator for external functions.)r    r   r   r   r   extern  s    r  )N)N)N)N)N)N)N)FN)N)N)N)Nr   r4  r4  N)N)N)NN)NNN)NNN)NNN)NNN)NNN)NNN)NNN)N)N)FN)N)N)N)N)N)N)NN)NN)N)NN)NFT)T)NFT)T)N)NNN)NN)NN)r   )r   )N)N)N)N)r4  N)r4  N)N)N)
__future__r   
contextlibr   enumr   	functoolsr   typingr   r   r   r	   Z_C.libtriton.tritonr   Zruntime.jitr   r4  r   r   r   r   r   r    r$   r7   r<   r   r   r   rH   r+   r=   r>   r-   r/   r?   r@   r.   r0   Zfloat8e5Zfloat8e4Zfloat8e4b15Zfloat16r  r4   r5   Zpi32_tr6   r*   r  r  r  r  r   r!  r#  r%  r&  r'  r(  r)  r+  r  r3  r  r5  r;  r=  rA  rI  rJ  rN  rP  rR  rT  rU  rV  rW  rX  r[  r\  r]  r^  r`  ra  rb  rc  r'   rh  rj  r  r  r  r  r  r  r  r  r  rQ  r  r  r  r  r  rS  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   <module>   s  " ?"2{ v
  .$

!












"

 !--*)