U
    9%e5                     @   s   d dl Zd dlZd dlmZ d dlmZ ddlmZm	Z	m
Z
 e	 rjd dlZd dlZd dlmZ ddlmZ e rd dlmZ d dlmZ e
eZdd	d
ZdddZdddZdd Zdd Zdd ZdS )    N)deepcopy)version   )is_accelerate_availableis_bitsandbytes_availablelogging)Conv1D)init_empty_weights)find_tied_parametersc                 C   s  d|krV| d}|dd D ].}t| |}|dkrHt|  d| d|} q|d }|| jkr~|| jkr~t|  d| d|| jk}t| |}	|	jtdkr|dtdfkr|dkrt| d| dd}
d}|st sd}d}
n6tt	j
d	ot| j| t	j
j}
t| j| t	j
j}|s.|
rT| j| }|jjd
kr|dkr\|	|}n`t|tjr|d}|jtjkrttjdtdk}|stdntj|dd}t| jtr|dkr|j}|	j}|rt	j
j|fddi||}n$|
r*t	j
j|fddi||}|| j|< |dk	rt| jd|| nj|dkrj|	|}n(t|tjr||}ntj||d}|r|| j|< nt
j||	j d}|| j|< dS )a  
    A helper function to set a given tensor (parameter of buffer) of a module on a specific device (note that doing
    `param.to(device)` creates a new tensor not linked to the parameter, which is why we need this function). The
    function is adapted from `set_module_tensor_to_device` function from accelerate that is adapted to support the
    class `Int8Params` from `bitsandbytes`.

    Args:
        module (`torch.nn.Module`):
            The module in which the tensor we want to move lives.
        tensor_name (`str`):
            The full name of the parameter/buffer.
        device (`int`, `str` or `torch.device`):
            The device on which to set the tensor.
        value (`torch.Tensor`, *optional*):
            The value of the tensor (useful when going from the meta device to any other device).
        fp16_statistics (`torch.HalfTensor`, *optional*):
            The list of fp16 statistics to set on the module, used for serialization.
    .Nz has no attribute z- does not have a parameter or a buffer named metaz7 is on the meta device, we need a `value` to put in on F
Params4bitcudacpubitsandbytesz0.37.2zDetected int8 weights but the version of bitsandbytes is not compatible with int8 serialization. Make sure to download the latest `bitsandbytes` version. `pip install --upgrade bitsandbytes`.)devicerequires_gradZSCB)r   )!splitgetattr
ValueError_parameters_buffersr   torchr   hasattrbnbnn
isinstancer   Z
Int8ParamstypetoZTensorZdtypeZint8r   parse	importlibmetadataZtensor
issubclass
source_clsr   T__dict__setattrweight	Parameterr   )moduleZtensor_namer   valueZfp16_statisticsZsplitsr   Z
new_moduleZ	is_buffer	old_valueZis_4bitZis_8bitparam	new_valueZis_8bit_serializablekwargs r0   e/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/transformers/integrations/bitsandbytes.py%set_module_quantized_tensor_to_device   sp    



*
"


 


r2   Fc           
   
      st  |   D ]`\}} dkrg   | t|tjs@t|tr4||kr4t fdd|D s4t  t|tr|jj	\}}n|j
}|j}| dkrtjj|||jdk	|j|jd| j|< d}nD|jdk	r||jkrn.tjj|||jdk	|j|j|jd| j|< d}t|| j| _| j| d W 5 Q R X tt| d	kr`t|| ||d
\}	} d q| |fS )z
    Private method that wraps the recursion for module replacement.

    Returns the converted model and a boolean that indicates if the conversion has been successfull or not.
    Nc                 3   s   | ]}|d   kV  qdS )r   N)join).0keycurrent_key_namer0   r1   	<genexpr>   s     z+_replace_with_bnb_linear.<locals>.<genexpr>Zllm_int8)Zhas_fp16_weights	thresholdT)Zcompress_statisticsZ
quant_typeFr   )has_been_replacedr   ) Znamed_childrenappendr   r   ZLinearr   anyr	   r(   shapein_featuresout_featuresZquantization_methodr   ZLinear8bitLtZbiasZllm_int8_has_fp16_weightZllm_int8_thresholdZ_modulesZllm_int8_skip_modulesZ
Linear4bitZbnb_4bit_compute_dtypeZbnb_4bit_use_double_quantZbnb_4bit_quant_typer   r$   Zrequires_grad_lenlistchildren_replace_with_bnb_linearpop)
modelmodules_to_not_convertr7   quantization_configr:   namer*   r>   r?   _r0   r6   r1   rC   w   s\    
"

rC   c                 C   s6   |dkrdgn|}t | |||\} }|s2td | S )a6  
    A helper function to replace all `torch.nn.Linear` modules by `bnb.nn.Linear8bit` modules from the `bitsandbytes`
    library. This will enable running your models using mixed int8 precision as described by the paper `LLM.int8():
    8-bit Matrix Multiplication for Transformers at Scale`. Make sure `bitsandbytes` compiled with the correct CUDA
    version of your hardware is installed before running this function. `pip install -i https://test.pypi.org/simple/
    bitsandbytes`

    The function will be run recursively and replace all `torch.nn.Linear` modules except for the `lm_head` that should
    be kept as a `torch.nn.Linear` module. The replacement is done under `init_empty_weights` context manager so no
    CPU/GPU memory is required to run this function. Int8 mixed-precision matrix decomposition works by separating a
    matrix multiplication into two streams: (1) and systematic feature outlier stream matrix multiplied in fp16
    (0.01%), (2) a regular stream of int8 matrix multiplication (99.9%). With this method, int8 inference with no
    predictive degradation is possible for very large models (>=176B parameters).

    Parameters:
        model (`torch.nn.Module`):
            Input model or `torch.nn.Module` as the function is run recursively.
        modules_to_not_convert (`List[`str`]`, *optional*, defaults to `["lm_head"]`):
            Names of the modules to not convert in `Linear8bitLt`. In practice we keep the `lm_head` in full precision
            for numerical stability reasons.
        current_key_name (`List[`str`]`, *optional*):
            An array to track the current key of the recursion. This is used to check whether the current key (part of
            it) is not in the list of modules to not convert (for instances modules that are offloaded to `cpu` or
            `disk`).
    NZlm_headzYou are loading your model in 8bit or 4bit but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)rC   loggerwarning)rE   rF   r7   rG   r:   r0   r0   r1   replace_with_bnb_linear   s       rL   c                  O   s   t dt t| |S )Nzj`replace_8bit_linear` will be deprecated in a future version, please use `replace_with_bnb_linear` instead)warningswarnFutureWarningrL   argsr/   r0   r0   r1   replace_8bit_linear   s
    rR   c                  O   s   t dt t| |S )Nz`set_module_8bit_tensor_to_device` will be deprecated in a future version, please use `set_module_quantized_tensor_to_device` instead)rM   rN   rO   r2   rP   r0   r0   r1    set_module_8bit_tensor_to_device   s
    rS   c                    s  t | }|  t|}t|trBtt| g t|  }n
t|g }t	|dk}|s| 
   dk	r fdd|  D }|S t|  }|d d g}t|t| }tt|t| }ddg}	g }
|D ],}|	D ]}||kr||d}q|
| q|
S )	a  
    An utility function to get the key of the module to keep in full precision if any For example for CausalLM modules
    we may want to keep the lm_head in full precision for numerical stability reasons. For other architectures, we want
    to keep the tied weights of the model. The function will return a list of the keys of the modules to not convert in
    int8.

    Parameters:
    model (`torch.nn.Module`):
        Input model
    r   Nc                    s$   g | ]\}}t |t  kr|qS r0   )id)r4   rH   r*   Z
output_embr0   r1   
<listcomp>  s      z+get_keys_to_not_convert.<locals>.<listcomp>r   z.weightz.bias )r   Ztie_weightsr
   r   dictsumrA   valueskeysr@   Zget_output_embeddingsZnamed_modulesZnamed_parameterssetreplacer;   )rE   Z
tied_modelZtied_paramsZ	tied_keysZhas_tied_paramsZlist_last_moduleZlist_modulesintersectionZlist_untouchedZnames_to_removeZfiltered_module_namesrH   Zname_to_remover0   rU   r1   get_keys_to_not_convert   s0    
 
r_   )NN)NNNF)NNN)importlib.metadatar!   rM   copyr   	packagingr   utilsr   r   r   r   r   r   Ztorch.nnr   Zpytorch_utilsr   Z
accelerater	   Zaccelerate.utilsr
   Z
get_logger__name__rJ   r2   rC   rL   rR   rS   r_   r0   r0   r0   r1   <module>   s.   

`       
A
*	