U
    9%eN.                     @   sh   d dl Z d dlZd dlZddlmZ ddlmZ ddlmZ dd Z	dd	 Z
d
d Zdd Zdd ZdS )    N   )_build)get_cache_manager)version_keyc                  C   s   dd l } | jjd k	S )Nr   )torchversionZhip)r    r   \/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/triton/compiler/make_launcher.pyis_hip
   s    r
   c                 C   sF   dd |  D }|  dd|  | }t|d }|S )Nc                 S   s&   i | ]\}}||d  dkrdn|qS )r   *Zptrr   ).0kvr   r   r	   
<dictcomp>   s      z%make_so_cache_key.<locals>.<dictcomp>- zutf-8)itemsjoinvalueshashlibmd5encode	hexdigest)Zversion_hash	signature	constantskeyr   r   r	   make_so_cache_key   s    r   c                 C   s   t t ||}t|}|  d}||}|d krt }t||}tj	|d}	t
|	d}
|
| W 5 Q R X t| |	|}t
|d.}
|j|
 |ddW  5 Q R  W  5 Q R  S Q R X W 5 Q R X n|S d S )Nz.sozmain.cwrbT)binary)r   r   r   Zget_filetempfileTemporaryDirectorygenerate_launcherospathr   openwriter   putread)namer   r   Zso_cache_keyZso_cache_managerZso_name
cache_pathZtmpdirsrcZsrc_pathfsor   r   r	   	make_stub   s    



>r.   c                 C   s<   | d dkrt  rdS dS dddddd	d
dddddd|  S )Nr   r   ZhipDeviceptr_tZCUdeviceptrint32_tZint8_tZint16_tint64_tuint32_tuint64_tfloatdouble)i1i8Zi16i32i64u32u64fp16bf16fp32f32fp64)r
   tyr   r   r	   	ty_to_cpp0   s"    rB   c                    s  d dd | D }dd  dd dd	  fd
d| D  }t rd| dd fdd| D  dd  fdd| D  d| dd dd | D  dd dd | D  dd dd | D  d}nd| dd fdd| D  dd  fdd| D  d | dd d!d | D  d"d d#d | D  d$d d%d | D  d&}|S )'N, c                 s   s$   | ]\}}t | d | V  qdS )z argN)rB   r   irA   r   r   r	   	<genexpr>D   s     z$generate_launcher.<locals>.<genexpr>c                 S   s.   | d dkrdS dddddddddd	d

|  S )Nr   r   	PyObject*r/   r0   r1   r2   r3   r4   )
r5   r7   r8   r9   r:   r;   r<   r=   r>   r?   r   r@   r   r   r	   _extracted_typeF   s    z*generate_launcher.<locals>._extracted_typec              	   S   s   ddddddddd	|  S )
NOr,   dlIrE   KL)rG   r3   r4   longr1   r/   r2   r0   r   r@   r   r   r	   	format_ofV   s    	z$generate_launcher.<locals>.format_ofZ
iiiiiKKOOOr   c                    s   g | ]} |qS r   r   )r   rA   )rH   rP   r   r	   
<listcomp>b   s     z%generate_launcher.<locals>.<listcomp>a  
    #define __HIP_PLATFORM_AMD__
    #include <hip/hip_runtime.h>
    #include <Python.h>
    #include <stdio.h>

    static inline void gpuAssert(hipError_t code, const char *file, int line)
    {
      if (code != HIP_SUCCESS)
      {
         const char* prefix = "Triton Error [HIP]: ";
         const char* str = hipGetErrorString(code);
         char err[1024] = {0};
         snprintf(err, 1024, "%s Code: %d, Messsage: %s", prefix, code, str );
         PyErr_SetString(PyExc_RuntimeError, err);
      }
    }

    #define HIP_CHECK(ans) { gpuAssert((ans), __FILE__, __LINE__); }

    static void _launch(int gridX, int gridY, int gridZ, int num_warps, int shared_memory, hipStream_t stream, hipFunction_t function, z) {
      void *params[] = { c                 3   s    | ]}| krd | V  qdS z&argNr   r   rE   r   r   r	   rF   {   s      a   };
      if (gridX*gridY*gridZ > 0) {
          HIP_CHECK(hipModuleLaunchKernel(function, gridX, gridY, gridZ, 64*num_warps, 1, 1, shared_memory, stream, params, 0));
      }
    }

    typedef struct _DevicePtrInfo {
      hipDeviceptr_t dev_ptr;
      bool valid;
    } DevicePtrInfo;

    static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {
      DevicePtrInfo ptr_info;
      ptr_info.dev_ptr = 0;
      ptr_info.valid = true;

      if (PyLong_Check(obj)) {
        ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(obj);
        return ptr_info;
      }

      if (obj == Py_None) {
        // valid nullptr
        return ptr_info;
      }

      PyObject *ptr = PyObject_GetAttrString(obj, "data_ptr");

      if (ptr) {
        PyObject *empty_tuple = PyTuple_New(0);
        PyObject *ret = PyObject_Call(ptr, empty_tuple, NULL);
        Py_DECREF(empty_tuple);
        Py_DECREF(ptr);

        if (!PyLong_Check(ret)) {
          PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
          ptr_info.valid = false;
          return ptr_info;
        }

        ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);

        if (!ptr_info.dev_ptr)
          return ptr_info;

        uint64_t dev_ptr;
        hipError_t status = hipPointerGetAttribute(&dev_ptr, HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
        if (status == hipErrorInvalidValue) {
            PyErr_Format(PyExc_ValueError,
                         "Pointer argument (at %d) cannot be accessed from Triton (cpu tensor?)", idx);
            ptr_info.valid = false;
        }

        ptr_info.dev_ptr = (hipDeviceptr_t)dev_ptr;
        return ptr_info;
      }

      PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
      return ptr_info;
    }

    static PyObject* launch(PyObject* self, PyObject* args) {

      int gridX, gridY, gridZ;
      uint64_t _stream;
      uint64_t _function;
      int num_warps;
      int shared_memory;
      PyObject *launch_enter_hook = NULL;
      PyObject *launch_exit_hook = NULL;
      PyObject *compiled_kernel = NULL;

       c                    s$   g | ]\}} | d | dqS z _arg; r   rD   rH   r   r	   rQ      s     z$
      if (!PyArg_ParseTuple(args, "z", &gridX, &gridY, &gridZ, &num_warps, &shared_memory, &_stream, &_function, &launch_enter_hook, &launch_exit_hook, &compiled_kernel, c                 s   s   | ]\}}d | V  qdS z&_argNr   rD   r   r   r	   rF      s     z)) {
        return NULL;
      }

      if (launch_enter_hook != Py_None) {
        PyObject_CallObject(launch_enter_hook, args);
      }

      // raise exception asap
      rW   c                 S   s>   g | ]6\}}|d  dkr6d| d| d| d| d	ndqS r   r   zDevicePtrInfo ptr_infoz = getPointer(_argrC   z); if (!ptr_infoz.valid) return NULL;r   r   rD   r   r   r	   rQ      s     zo;
      _launch(gridX, gridY, gridZ, num_warps, shared_memory, (hipStream_t)_stream, (hipFunction_t)_function, c                 s   s4   | ],\}}|d  dkr"d| dnd| V  qdS r   r   Zptr_infoz.dev_ptrZ_argNr   rD   r   r   r	   rF      s     a5  );
      if (launch_exit_hook != Py_None) {
        PyObject_CallObject(launch_exit_hook, args);
      }
      if (PyErr_Occurred()) {
        return NULL;
      }

      // return None
      Py_INCREF(Py_None);
      return Py_None;
    }

    static PyMethodDef ModuleMethods[] = {
      {"launch", launch, METH_VARARGS, "Entry point for all kernels with this signature"},
      {NULL, NULL, 0, NULL} // sentinel
    };

    static struct PyModuleDef ModuleDef = {
      PyModuleDef_HEAD_INIT,
      "__triton_launcher",
      NULL, //documentation
      -1, //size
      ModuleMethods
    };

    PyMODINIT_FUNC PyInit___triton_launcher(void) {
      PyObject *m = PyModule_Create(&ModuleDef);
      if(m == NULL) {
        return NULL;
      }
      PyModule_AddFunctions(m, ModuleMethods);
      return m;
    }
    a_  
#include "cuda.h"
#include <stdbool.h>
#include <Python.h>

static inline void gpuAssert(CUresult code, const char *file, int line)
{
   if (code != CUDA_SUCCESS)
   {
      const char* prefix = "Triton Error [CUDA]: ";
      const char* str;
      cuGetErrorString(code, &str);
      char err[1024] = {0};
      strcat(err, prefix);
      strcat(err, str);
      PyErr_SetString(PyExc_RuntimeError, err);
   }
}

#define CUDA_CHECK(ans) { gpuAssert((ans), __FILE__, __LINE__); }

static void _launch(int gridX, int gridY, int gridZ, int num_warps, int shared_memory, CUstream stream, CUfunction function, z) {
  void *params[] = { c                 3   s    | ]}| krd | V  qdS rR   r   rS   rT   r   r	   rF     s      a   };
  if(gridX*gridY*gridZ > 0){
    CUDA_CHECK(cuLaunchKernel(function, gridX, gridY, gridZ, 32*num_warps, 1, 1, shared_memory, stream, params, 0));
  }
}

typedef struct _DevicePtrInfo {
    CUdeviceptr dev_ptr;
    bool valid;
} DevicePtrInfo;

static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {
  DevicePtrInfo ptr_info;
  ptr_info.dev_ptr = 0;
  ptr_info.valid = true;
  if (PyLong_Check(obj)) {
    ptr_info.dev_ptr = PyLong_AsUnsignedLongLong(obj);
    return ptr_info;
  }
  if (obj == Py_None) {
    // valid nullptr
    return ptr_info;
  }
  PyObject *ptr = PyObject_GetAttrString(obj, "data_ptr");
  if(ptr){
    PyObject *empty_tuple = PyTuple_New(0);
    PyObject *ret = PyObject_Call(ptr, empty_tuple, NULL);
    Py_DECREF(empty_tuple);
    Py_DECREF(ptr);
    if (!PyLong_Check(ret)) {
      PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
      ptr_info.valid = false;
      return ptr_info;
    }
    ptr_info.dev_ptr = PyLong_AsUnsignedLongLong(ret);
    if(!ptr_info.dev_ptr)
      return ptr_info;
    uint64_t dev_ptr;
    int status = cuPointerGetAttribute(&dev_ptr, CU_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
    if (status == CUDA_ERROR_INVALID_VALUE) {
        PyErr_Format(PyExc_ValueError,
                     "Pointer argument (at %d) cannot be accessed from Triton (cpu tensor?)", idx);
        ptr_info.valid = false;
    }
    ptr_info.dev_ptr = dev_ptr;
    Py_DECREF(ret);  // Thanks ChatGPT!
    return ptr_info;
  }
  PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
  return ptr_info;
}

static PyObject* launch(PyObject* self, PyObject* args) {
  int gridX, gridY, gridZ;
  uint64_t _stream;
  uint64_t _function;
  int num_warps;
  int shared_memory;
  PyObject *launch_enter_hook = NULL;
  PyObject *launch_exit_hook = NULL;
  PyObject *compiled_kernel = NULL;
  c                    s$   g | ]\}} | d | dqS rV   r   rD   rX   r   r	   rQ   E  s     z
  if(!PyArg_ParseTuple(args, "c                 s   s   | ]\}}d | V  qdS rY   r   rD   r   r   r	   rF   F  s     z)) {
    return NULL;
  }

  if (launch_enter_hook != Py_None) {
    PyObject_CallObject(launch_enter_hook, args);
  }


  // raise exception asap
  c                 S   s>   g | ]6\}}|d  dkr6d| d| d| d| d	ndqS rZ   r   rD   r   r   r	   rQ   P  s     ze;
  _launch(gridX, gridY, gridZ, num_warps, shared_memory, (CUstream)_stream, (CUfunction)_function, c                 s   s4   | ],\}}|d  dkr"d| dnd| V  qdS r[   r   rD   r   r   r	   rF   Q  s     a  );

  if (launch_exit_hook != Py_None) {
    PyObject_CallObject(launch_exit_hook, args);
  }

  if(PyErr_Occurred()) {
    return NULL;
  }
  // return None
  Py_INCREF(Py_None);
  return Py_None;
}

static PyMethodDef ModuleMethods[] = {
  {"launch", launch, METH_VARARGS, "Entry point for all kernels with this signature"},
  {NULL, NULL, 0, NULL} // sentinel
};

static struct PyModuleDef ModuleDef = {
  PyModuleDef_HEAD_INIT,
  "__triton_launcher",
  NULL, //documentation
  -1, //size
  ModuleMethods
};

PyMODINIT_FUNC PyInit___triton_launcher(void) {
  PyObject *m = PyModule_Create(&ModuleDef);
  if(m == NULL) {
    return NULL;
  }
  PyModule_AddFunctions(m, ModuleMethods);
  return m;
}
)r   r   r   r
   keys)r   r   Z	arg_declsformatr+   r   )rH   r   rP   r	   r"   C   sL    "]^^gh
 STT^_ r"   )r   r#   r    commonr   Zruntime.cacher   Zruntime.jitr   r
   r   r.   rB   r"   r   r   r   r	   <module>   s   