U
    0-ewZ                     @   s  d Z ddlZddlmZmZ ddlmZmZ ddlZddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ eddrddlm  mZ e rddlmZ dd Zdd Zdd Zdd Zdd ZeddddZdTddZdd Z dd  Z!d!d" Z"d#d$ Z#d%d& Z$d'd( Z%d)d* Z&G d+d, d,e'Z(d-d. Z)d/d0 Z*e)d1d2 Z+ed3d4d5Z,ed3d6d7Z-dUd8d9Z.dVd;d<Z/e)dWe0d=d>d?Z1dXe0d=d@dAZ2dYdBdCZ3dZdDdEZ4e*d[dFdGZ5e)d\dJdKZ6dLdM Z7G dNdO dOZ8dPdQ Z9dRdS Z:dS )]zB
A set of basic tensor ops compatible with tpu, gpu, and multigpu
    N)update_wrapperwraps)AnyMapping   )PartialState   )!TORCH_DISTRIBUTED_OPERATION_TYPES)DistributedTypeTensorInformation)is_torch_distributed_availableis_tpu_availableF)Zcheck_device)ReduceOpc                 C   s   t | tjS N)
isinstancetorchTensortensor r   \/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/accelerate/utils/operations.pyis_torch_tensor'   s    r   c              	   C   s2   t | tjjtjjtjjtjjtjjtjjtjj	S r   )
r   r   ZxpuZFloatTensorZ
ByteTensorZ	IntTensorZ
LongTensorZ
HalfTensorZDoubleTensorZBFloat16Tensorr   r   r   r   is_torch_xpu_tensor+   s    r   c                 C   s
   t | tS r   )r   r   Ztensor_infor   r   r   is_tensor_information8   s    r   c                 C   sV   t | }|j}t|dks&|d tkr*dS t|dd}t|tsDdS tdd |D S )z
    Checks if `x` is a `namedtuple` or not. Can have false positives, but only if a user is trying to mimic a
    `namedtuple` perfectly.
    r   r   F_fieldsNc                 s   s   | ]}t |tV  qd S r   )r   str).0memberr   r   r   	<genexpr>H   s     z is_namedtuple.<locals>.<genexpr>)type	__bases__lentuplegetattrr   all)dataZ	data_typebasesfieldsr   r   r   is_namedtuple<   s    
r)   c                 C   s(   t | rt| t| S t| |S dS )zO
    Cast a generator to the same type as obj (list, tuple, or namedtuple)
    N)r)   r    list)obj	generatorr   r   r   
honor_typeK   s    r-   	test_typeerror_on_other_typec                   s   t |ttfr.t| fdd|D S t |tr^t| fdd| D S |rv|f S rtdt| dj dj d|S )	a9  
    Recursively apply a function on a data structure that is a nested list/tuple/dictionary of a given base type.

    Args:
        func (`callable`):
            The function to recursively apply.
        data (nested list/tuple/dictionary of `main_type`):
            The data on which to apply `func`
        *args:
            Positional arguments that will be passed to `func` when applied on the unpacked data.
        main_type (`type`, *optional*, defaults to `torch.Tensor`):
            The base type of the objects to which apply `func`.
        error_on_other_type (`bool`, *optional*, defaults to `False`):
            Whether to return an error or not if after unpacking `data`, we get on an object that is not of type
            `main_type`. If `False`, the function will leave objects of types different than `main_type` unchanged.
        **kwargs:
            Keyword arguments that will be passed to `func` when applied on the unpacked data.

    Returns:
        The same data structure as `data` with `func` applied to every object of type `main_type`.
    c                 3   s*   | ]"}t |f d V  qdS )r.   Nrecursively_apply)r   oargsr0   funckwargsr/   r   r   r   o   s     z$recursively_apply.<locals>.<genexpr>c                    s.   i | ]&\}}|t |f d qS )r.   r1   r   kvr4   r   r   
<dictcomp>x   s      z%recursively_apply.<locals>.<dictcomp>zUnsupported types (z) passed to `z?`. Only nested list/tuple/dicts of objects that are valid for `z` should be passed.)	r   r#   r*   r-   r   r    items	TypeError__name__)r6   r&   r/   r0   r5   r7   r   r4   r   r2   V   s(    	
r2   c                    s   t | ttfr*t|  fdd| D S t | trtt trFgndkrRg t|  fdd|  D S t| drz| j	 dW S  t
k
r   | 	  Y S X n| S dS )a  
    Recursively sends the elements in a nested list/tuple/dictionary of tensors to a given device.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to send to a given device.
        device (`torch.device`):
            The device to send the data to.

    Returns:
        The same data structure as `tensor` with all tensors sent to the proper device.
    c                 3   s   | ]}t | d V  qdS )non_blocking	skip_keysNsend_to_device)r   tdevicer@   rA   r   r   r      s     z!send_to_device.<locals>.<genexpr>Nc              	      s.   i | ]&\}}||kr|nt | d qS )r?   rB   )r   r9   rD   rE   r   r   r;      s    z"send_to_device.<locals>.<dictcomp>to)r@   )r   r#   r*   r-   r   r   r    r<   hasattrrG   r=   )r   rF   r@   rA   r   rE   r   rC      s*     


rC   c                 C   s   dd }t || S )aK  
    Recursively gathers the information needed to rebuild a nested list/tuple/dictionary of tensors.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`):
            The data to send to analyze.

    Returns:
        The same data structure as `data` with [`~utils.TensorInformation`] instead of tensors.
    c                 S   s   t | j| jdS )N)shapedtype)r   rI   rJ   r   r   r   r   _get_data_structure   s    z/get_data_structure.<locals>._get_data_structurer1   )r&   rK   r   r   r   get_data_structure   s    rL   c                 C   s   dd }t || S )a:  
    Recursively gathers the shape of a nested list/tuple/dictionary of tensors as a list.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`):
            The data to send to analyze.

    Returns:
        The same data structure as `data` with lists of tensor shapes instead of tensors.
    c                 S   s
   t | jS r   )r*   rI   r   r   r   r   
_get_shape   s    zget_shape.<locals>._get_shaper1   )r&   rM   r   r   r   	get_shape   s    rN   c                 C   s   dd }t || tdS )z
    Recursively initializes tensors from a nested list/tuple/dictionary of [`~utils.TensorInformation`].

    Returns:
        The same data structure as `data` with tensors instead of [`~utils.TensorInformation`].
    c                 S   s   t j| jd| jiS NrJ   )r   emptyrI   rJ   r   r   r   r   _initialize_tensor   s    z.initialize_tensors.<locals>._initialize_tensorr/   )r2   r   )Zdata_structurerQ   r   r   r   initialize_tensors   s    rS   c                 C   sl   t | ttfrt| d S t | trB|  D ]}t| |   S n t | tjsbtdt	|  d| j
d S )a  
    Recursively finds the batch size in a nested list/tuple/dictionary of lists of tensors.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`): The data from which to find the batch size.

    Returns:
        `int`: The batch size.
    r   z0Can only find the batch size of tensors but got .)r   r#   r*   find_batch_sizer   keysr   r   r=   r    rI   )r&   r9   r   r   r   rU      s    

rU   c                 C   s   dd }t || S )aS  
    Recursively finds tensors in a nested list/tuple/dictionary and converts them to a list of numbers.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`): The data from which to convert to regular numbers.

    Returns:
        The same data structure as `data` with lists of numbers instead of `torch.Tensor`.
    c                 S   s,   |    } | jtjkr$| tj} |  S r   )detachcpurJ   r   bfloat16rG   Zfloat32tolistr   r   r   r   _convert_to_list   s    z!listify.<locals>._convert_to_listr1   )r&   r[   r   r   r   listify   s    	r\   c                 C   s"   dd }t || dd}t  |S )Nc                 S   s0   | j dkr|  d  } |  s&|  } t| S )Nr   )ndimcloneis_contiguous
contiguousxm
all_gatherr   r   r   r   _tpu_gather_one  s
    
z$_tpu_gather.<locals>._tpu_gather_oneTr0   )r2   ra   Z	mark_step)r   rc   resr   r   r   _tpu_gather  s    	rf   c                 C   s   dd }t || ddS )Nc                    s^    j dkr  d     s&    fddttj D }tj|  tj	|ddS )Nr   c                    s   g | ]}t  qS r   )r   Z
empty_liker   _r   r   r   
<listcomp>"  s     z8_gpu_gather.<locals>._gpu_gather_one.<locals>.<listcomp>dim)
r]   r^   r_   r`   ranger   distributedZget_world_sizerb   cat)r   Zoutput_tensorsr   r   r   _gpu_gather_one  s    
z$_gpu_gather.<locals>._gpu_gather_oneTrd   r1   )r   ro   r   r   r   _gpu_gather  s    rp   c                   @   s   e Zd ZdZdS )DistributedOperationExceptionz
    An exception class for distributed operations. Raised if the operation cannot be performed due to the shape of the
    tensors.
    N)r>   
__module____qualname____doc__r   r   r   r   rq   )  s   rq   c                    s   t   fdd}|S )zv
    Verifies that `tensor` is the same shape across all processes. Only ran if `PartialState().debug` is `True`.
    c                     s   t  jtjkst  js  | |S  j d j }d|krD|d }n| d }t|}t|g}|d d k	r|	|d t
|k}|sddd t|D }td| d|  | |S )	NrT   r   r   z
  - c                 S   s    g | ]\}}d | d| qS )zProcess z: r   )r   irI   r   r   r   ri   E  s     z5verify_operation.<locals>.wrapper.<locals>.<listcomp>znCannot apply desired operation due to shape mismatches. All shapes across devices must be valid.

Operation: `z`
Input shapes:
  - )r   distributed_typer
   NOdebugrr   r>   rN   gather_objectcountr"   join	enumeraterq   )r5   r7   	operationr   ZshapesoutputZare_sameZprocess_shape_strfunctionr   r   wrapper7  s     


z!verify_operation.<locals>.wrapperr   r   r   r   r   r   verify_operation2  s    r   c                    s   t   fdd}|S )z
    Checks that `verify_operation` failed and if so reports a more helpful error chaining the existing
    `DistributedOperationException`.
    c               
      sX   z | |W S  t k
rR } z( j d j }t d| d|W 5 d }~X Y nX d S )NrT   zError found while calling `z1`. Please see the earlier error for more details.)rq   rr   r>   )r5   r7   er}   r   r   r   r   V  s    
z"chained_operation.<locals>.wrapperr   r   r   r   r   chained_operationP  s    	r   c                 C   s2   t  jtjkrt| S t  jtkr*t| S | S dS )a4  
    Recursively gather tensor in a nested list/tuple/dictionary of tensors from all devices.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to gather.

    Returns:
        The same data structure as `tensor` with all tensors sent to the proper device.
    N)r   rv   r
   TPUrf   r	   rp   r   r   r   r   gatherc  s
    r   objectc                 C   s2   dd t t jD }tj||  dd |D S )Nc                 S   s   g | ]}d qS r   r   rg   r   r   r   ri   x  s     z&_gpu_gather_object.<locals>.<listcomp>c                 S   s   g | ]}|D ]}|qqS r   r   )r   yxr   r   r   ri   {  s       )rl   r   num_processesr   rm   Zall_gather_object)r   Zoutput_objectsr   r   r   _gpu_gather_objectw  s    r   c                 C   s4   t  jtjkrtdnt  jtkr,t| S | S dS )a5  
    Recursively gather object in a nested list/tuple/dictionary of objects from all devices.

    Args:
        object (nested list/tuple/dictionary of picklable object):
            The data to gather.

    Returns:
        The same data structure as `object` with all the objects sent to every device.
    z&gather objects in TPU is not supportedN)r   rv   r
   r   NotImplementedErrorr	   r   r   r   r   r   ry   ~  s
    
ry   c                 C   s   ddd}t || d|dS )Nr   c                 S   s   t jj| |d | S )Nsrc)r   rm   	broadcast)r   r   r   r   r   _gpu_broadcast_one  s    z*_gpu_broadcast.<locals>._gpu_broadcast_oneT)r0   r   )r   r1   )r&   r   r   r   r   r   _gpu_broadcast  s    
r   broadcast tensorc                    sh   t | ttfr*t|  fddt| D S t | trRt|  fdd|  D S t	 | fddS )Nc                 3   s(   | ] \}}t |  d | dV  qdS )rh   nameN_tpu_broadcast)r   ru   rD   r   r   r   r     s     z!_tpu_broadcast.<locals>.<genexpr>c                    s(   i | ] \}}|t |  d | dqS )rh   r   r   r8   r   r   r   r;     s      z"_tpu_broadcast.<locals>.<dictcomp>c                    s   |   S r   r   r   r   r   r   <lambda>      z _tpu_broadcast.<locals>.<lambda>)
r   r*   r#   r-   r|   r   r    r<   ra   mesh_reduce)r   r   r   r   )r   r   r   r     s
    
r   from_processc                 C   s<   t  jtjkrt| |ddS t  jtkr4t| |dS | S dS )a  
    Recursively broadcast tensor in a nested list/tuple/dictionary of tensors to all devices.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to gather.
        from_process (`int`, *optional*, defaults to 0):
            The process from which to send the data

    Returns:
        The same data structure as `tensor` with all tensors broadcasted to the proper device.
    zaccelerate.utils.broadcast)r   r   r   N)r   rv   r
   r   r   r	   r   )r   r   r   r   r   r     s
    r   c                    s\   t  jtjkr<t| D ]"\}}td| fdd| |< qnt  jtkrXtj	j
|  d | S )a  
    Broadcast a list of picklable objects form one process to the others.

    Args:
        object_list (list of picklable objects):
            The list of objects to broadcast. This list will be modified inplace.
        from_process (`int`, *optional*, defaults to 0):
            The process from which to send the data.

    Returns:
        The same list containing the objects from process 0.
    z&accelerate.utils.broadcast_object_listc                    s   |   S r   r   r   r   r   r   r     r   z'broadcast_object_list.<locals>.<lambda>r   )r   rv   r
   r   r|   ra   r   r	   r   rm   broadcast_object_list)Zobject_listr   ru   r+   r   r   r   r     s    r   c                 C   s   dd }t || |S )aN  
    Recursively takes a slice in a nested list/tuple/dictionary of tensors.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`):
            The data to slice.
        tensor_slice (`slice`):
            The slice to take.

    Returns:
        The same data structure as `data` with all the tensors slices.
    c                 S   s   | | S r   r   )r   tensor_slicer   r   r   _slice_tensor  s    z$slice_tensors.<locals>._slice_tensorr1   )r&   r   Zprocess_indexr   r   r   r   r   slice_tensors  s    r   c                    s   t  d ttfr<t d  fddtt d D S t  d trrt d  fdd d  D S t  d t	j
stdt d  t	j dS )a  
    Recursively concatenate the tensors in a nested list/tuple/dictionary of lists of tensors with the same shape.

    Args:
        data (nested list/tuple/dictionary of lists of tensors `torch.Tensor`):
            The data to concatenate.
        dim (`int`, *optional*, defaults to 0):
            The dimension on which to concatenate.

    Returns:
        The same data structure as `data` with all the tensors concatenated.
    r   c                 3   s(   | ]  t  fd dD dV  qdS )c                    s   g | ]}|  qS r   r   r   dru   r   r   ri     s     z)concatenate.<locals>.<genexpr>.<listcomp>rj   Nconcatenater   r&   rk   r   r   r     s     zconcatenate.<locals>.<genexpr>c                    s(   i | ]   t  fd dD dqS )c                    s   g | ]}|  qS r   r   r   r9   r   r   ri     s     z*concatenate.<locals>.<dictcomp>.<listcomp>rj   r   r   r   r   r   r;     s      zconcatenate.<locals>.<dictcomp>z%Can only concatenate tensors but got rj   )r   r#   r*   r-   rl   r"   r   r    rV   r   r   r=   rn   r   r   r   r   r     s    *(r   c                 C   s   ddd}t || d|||dS )a3  
    Recursively pad the tensors in a nested list/tuple/dictionary of tensors from all devices to the same size so they
    can safely be gathered.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to gather.
        dim (`int`, *optional*, defaults to 0):
            The dimension on which to pad.
        pad_index (`int`, *optional*, defaults to 0):
            The value with which to pad.
        pad_first (`bool`, *optional*, defaults to `False`):
            Whether to pad at the beginning or the end.
    r   Fc           	         s    t | jkr| S tj| j| jdd  }t| }t fdd|D | j  kr\| S | jt}| < | 	t
|| }|rt
 fddtt |D }n t
 fddtt |D }| ||< |S )N)rF   c                 3   s   | ]}|  V  qd S r   r   )r   srj   r   r   r     s     zFpad_across_processes.<locals>._pad_across_processes.<locals>.<genexpr>c                 3   s0   | ](}| kr t    nt d V  qd S r   slicer   ru   rk   max_sizeold_sizer   r   r     s    c                 3   s,   | ]$}| krt d   nt dV  qdS )r   Nr   r   )rk   r   r   r   r     s     )r"   rI   r   r   rF   r   rX   maxr*   Z	new_zerosr#   rl   )	r   rk   	pad_index	pad_firstsizesizesnew_sizeZ
new_tensorindicesr   r   r   _pad_across_processes  s$    

 z3pad_across_processes.<locals>._pad_across_processesT)r0   rk   r   r   )r   r   Fr1   )r   rk   r   r   r   r   r   r   pad_across_processes  s    
     r   mean      ?c                 C   s   ddd}t || d||dS )aX  
    Recursively reduce the tensors in a nested list/tuple/dictionary of lists of tensors across all processes by the
    mean of a given operation.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to reduce.
        reduction (`str`, *optional*, defaults to `"mean"`):
            A reduction method. Can be of "mean", "sum", or "none"
        scale (`float`, *optional*):
            A default scaling value to be applied after the reduce, only valied on XLA.

    Returns:
        The same data structure as `data` with all the tensors reduced.
    r   r   c                 S   sl   t  }|  }|jtjkr|S |jtjkr:td|| n|jjt	krVt
j|tj |dkrh||j }|S )Nsumr   )r   r^   rv   r
   rw   r   ra   Z
all_reducevaluer	   r   rm   r   ZSUMr   )r   	reductionscalestateZcloned_tensorr   r   r   _reduce_across_processes7  s    
z(reduce.<locals>._reduce_across_processesT)r0   r   r   )r   r   r1   )r   r   r   r   r   r   r   reduce%  s    
    r   c                 C   s   dd }dd }t || |dS )av  
    Recursively converts the elements nested list/tuple/dictionary of tensors in FP16/BF16 precision to FP32.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to convert from FP16/BF16 to FP32.

    Returns:
        The same data structure as `tensor` with all tensors that were in FP16/BF16 precision converted to FP32.
    c                 S   s   |   S r   )floatr   r   r   r   _convert_to_fp32U  s    z)convert_to_fp32.<locals>._convert_to_fp32c                 S   s   t | do| jtjtjfkS rO   )rH   rJ   r   Zfloat16rY   r   r   r   r   _is_fp16_bf16_tensorX  s    z-convert_to_fp32.<locals>._is_fp16_bf16_tensorrR   r1   )r   r   r   r   r   r   convert_to_fp32I  s    r   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	ConvertOutputsToFp32ad  
    Decorator to apply to a function outputing tensors (like a model forward pass) that ensures the outputs in FP16
    precision will be convert back to FP32.

    Args:
        model_forward (`Callable`):
            The function which outputs we want to treat.

    Returns:
        The same function as `model_forward` but with converted outputs.
    c                 C   s   || _ t| | d S r   )model_forwardr   )selfr   r   r   r   __init__k  s    zConvertOutputsToFp32.__init__c                 O   s   t | j||S r   )r   r   )r   r5   r7   r   r   r   __call__o  s    zConvertOutputsToFp32.__call__c                 C   s   t dd S )NzCannot pickle a prepared model with automatic mixed precision, please unwrap the model with `Accelerator.unwrap_model(model)` before pickling it.)picklePicklingError)r   r   r   r   __getstate__r  s    z!ConvertOutputsToFp32.__getstate__N)r>   rr   rs   rt   r   r   r   r   r   r   r   r   ^  s   r   c                    s   t    fdd} |_|S )Nc                     s
    | |S r   r   )r5   r7   r   r   r   forward{  s    z(convert_outputs_to_fp32.<locals>.forward)r   __wrapped__)r   r   r   r   r   convert_outputs_to_fp32x  s    r   c                 C   sz   t | tr2|  D ]}t|}|dk	r|  S qnDt | ttfrd| D ]}t|}|dk	rD|  S qDnt | tjrv| jS dS )z
    Finds the device on which a nested dict/list/tuple of tensors lies (assuming they are all on the same device).

    Args:
        (nested list/tuple/dictionary of `torch.Tensor`): The data we want to know the device of.
    N)	r   r   valuesfind_devicer#   r*   r   r   rF   )r&   r+   rF   r   r   r   r     s    
r   )FN)r   )r   r   )r   )r   )NN)r   )r   r   F)r   r   );rt   r   	functoolsr   r   typingr   r   r   r   r   	constantsr	   dataclassesr
   r   Zimportsr   r   Ztorch_xla.core.xla_modelcoreZ	xla_modelra   Ztorch.distributedr   r   r   r   r)   r-   r2   rC   rL   rN   rS   rU   r\   rf   rp   	Exceptionrq   r   r   r   r   ry   r   r   intr   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s`   
3
%	




.#