U
    ,-e;{                     @   s*  d Z ddlZddlmZmZmZmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZmZmZmZmZ ddlmZ eeZdZdZ ddddgZ!dgZ"d=ej#e$e%ej#dddZ&G dd de	j'Z(G dd de	j'Z)G dd de	j'Z*G dd de	j'Z+G dd  d e	j'Z,G d!d" d"e	j'Z-G d#d$ d$e	j'Z.G d%d& d&e	j'Z/G d'd( d(e	j'Z0G d)d* d*eZ1d+Z2d,Z3ed-e2G d.d/ d/e1Z4G d0d1 d1e	j'Z5G d2d3 d3e	j'Z6G d4d5 d5e	j'Z7G d6d7 d7e	j'Z8G d8d9 d9e	j'Z9ed:e2G d;d< d<e1Z:dS )>z PyTorch GLPN model.    N)ListOptionalTupleUnion)nn   )ACT2FN)BaseModelOutputDepthEstimatorOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )
GLPNConfigr   zvinvino02/glpn-kittii                 F)input	drop_probtrainingreturnc                 C   sd   |dks|s| S d| }| j d fd| jd   }|tj|| j| jd }|  | || }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r   r   r   )r   )dtypedevice)shapendimtorchZrandr   r   Zfloor_div)r   r   r   Z	keep_probr   Zrandom_tensoroutput r#   g/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/transformers/models/glpn/modeling_glpn.py	drop_path8   s    
r%   c                       sP   e Zd ZdZdee dd fddZejejdddZ	e
d	d
dZ  ZS )GLPNDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).N)r   r   c                    s   t    || _d S N)super__init__r   )selfr   	__class__r#   r$   r)   P   s    
zGLPNDropPath.__init__hidden_statesr   c                 C   s   t || j| jS r'   )r%   r   r   )r*   r.   r#   r#   r$   forwardT   s    zGLPNDropPath.forward)r   c                 C   s   d | jS )Nzp={})formatr   )r*   r#   r#   r$   
extra_reprW   s    zGLPNDropPath.extra_repr)N)__name__
__module____qualname____doc__r   floatr)   r    Tensorr/   strr1   __classcell__r#   r#   r+   r$   r&   M   s   r&   c                       s(   e Zd ZdZ fddZdd Z  ZS )GLPNOverlapPatchEmbeddingsz+Construct the overlapping patch embeddings.c                    s4   t    tj|||||d d| _t|| _d S )N   kernel_sizestridepadding)r(   r)   r   Conv2dproj	LayerNorm
layer_norm)r*   
patch_sizer>   num_channelshidden_sizer+   r#   r$   r)   _   s    
z#GLPNOverlapPatchEmbeddings.__init__c                 C   s>   |  |}|j\}}}}|ddd}| |}|||fS )Nr;   r   )rA   r   flatten	transposerC   )r*   pixel_values
embeddings_heightwidthr#   r#   r$   r/   k   s
    

z"GLPNOverlapPatchEmbeddings.forwardr2   r3   r4   r5   r)   r/   r9   r#   r#   r+   r$   r:   \   s   r:   c                       s2   e Zd ZdZ fddZdd Zd	ddZ  ZS )
GLPNEfficientSelfAttentionzSegFormer's efficient self-attention mechanism. Employs the sequence reduction process introduced in the [PvT
    paper](https://arxiv.org/abs/2102.12122).c                    s   t    || _|| _| j| j dkr@td| j d| j dt| j| j | _| j| j | _t	| j| j| _
t	| j| j| _t	| j| j| _t|j| _|| _|dkrtj||||d| _t|| _d S )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()r   )r=   r>   )r(   r)   rF   num_attention_heads
ValueErrorintattention_head_sizeall_head_sizer   LinearquerykeyvalueDropoutZattention_probs_dropout_probdropoutsr_ratior@   srrB   rC   r*   configrF   rQ   sequence_reduction_ratior+   r#   r$   r)   z   s,    
   z#GLPNEfficientSelfAttention.__init__c                 C   s6   |  d d | j| jf }||}|ddddS )Nr   r;   r   r   )sizerQ   rT   viewpermute)r*   r.   Z	new_shaper#   r#   r$   transpose_for_scores   s    
z/GLPNEfficientSelfAttention.transpose_for_scoresFc                 C   s$  |  | |}| jdkrl|j\}}}|ddd||||}| |}|||dddd}| |}|  | |}	|  | 	|}
t
||	dd}|t| j }tjj|dd}| |}t
||
}|dddd }| d d | jf }||}|r||fn|f}|S )Nr   r   r;   ra   dimr   )re   rW   r\   r   rd   reshaper]   rC   rX   rY   r    matmulrH   mathsqrtrT   r   Z
functionalZsoftmaxr[   
contiguousrb   rU   rc   )r*   r.   rL   rM   output_attentionsZquery_layer
batch_sizeseq_lenrE   Z	key_layerZvalue_layerZattention_scoresZattention_probsZcontext_layerZnew_context_layer_shapeoutputsr#   r#   r$   r/      s&    




z"GLPNEfficientSelfAttention.forward)F)r2   r3   r4   r5   r)   re   r/   r9   r#   r#   r+   r$   rO   v   s
   
 rO   c                       s$   e Zd Z fddZdd Z  ZS )GLPNSelfOutputc                    s*   t    t||| _t|j| _d S r'   )r(   r)   r   rV   denserZ   hidden_dropout_probr[   )r*   r_   rF   r+   r#   r$   r)      s    
zGLPNSelfOutput.__init__c                 C   s   |  |}| |}|S r'   )rs   r[   )r*   r.   Zinput_tensorr#   r#   r$   r/      s    

zGLPNSelfOutput.forwardr2   r3   r4   r)   r/   r9   r#   r#   r+   r$   rr      s   rr   c                       s.   e Zd Z fddZdd ZdddZ  ZS )	GLPNAttentionc                    s6   t    t||||d| _t||d| _t | _d S )N)r_   rF   rQ   r`   )rF   )r(   r)   rO   r*   rr   r"   setpruned_headsr^   r+   r#   r$   r)      s    
zGLPNAttention.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   rg   )lenr   r*   rQ   rT   rx   r   rW   rX   rY   r"   rs   rU   union)r*   headsindexr#   r#   r$   prune_heads   s       zGLPNAttention.prune_headsFc                 C   s6   |  ||||}| |d |}|f|dd   }|S )Nr   r   )r*   r"   )r*   r.   rL   rM   rn   Zself_outputsattention_outputrq   r#   r#   r$   r/      s    zGLPNAttention.forward)F)r2   r3   r4   r)   r}   r/   r9   r#   r#   r+   r$   rv      s   rv   c                       s&   e Zd Zd fdd	Zdd Z  ZS )
GLPNDWConv   c              	      s(   t    tj||dddd|d| _d S )Nr   r   T)biasgroups)r(   r)   r   r@   dwconv)r*   rh   r+   r#   r$   r)      s    
zGLPNDWConv.__init__c                 C   sD   |j \}}}|dd||||}| |}|ddd}|S )Nr   r;   )r   rH   rc   r   rG   )r*   r.   rL   rM   ro   rp   rE   r#   r#   r$   r/     s
    
zGLPNDWConv.forward)r   ru   r#   r#   r+   r$   r      s   r   c                       s&   e Zd Zd fdd	Zdd Z  ZS )
GLPNMixFFNNc                    sl   t    |p|}t||| _t|| _t|jt	rDt
|j | _n|j| _t||| _t|j| _d S r'   )r(   r)   r   rV   dense1r   r   
isinstanceZ
hidden_actr8   r   intermediate_act_fndense2rZ   rt   r[   )r*   r_   in_featureshidden_featuresZout_featuresr+   r#   r$   r)     s    

zGLPNMixFFN.__init__c                 C   sD   |  |}| |||}| |}| |}| |}| |}|S r'   )r   r   r   r[   r   )r*   r.   rL   rM   r#   r#   r$   r/     s    




zGLPNMixFFN.forward)NNru   r#   r#   r+   r$   r     s   r   c                       s*   e Zd ZdZ fddZdddZ  ZS )	GLPNLayerzCThis corresponds to the Block class in the original implementation.c                    sn   t    t|| _t||||d| _|dkr8t|nt | _	t|| _
t|| }t|||d| _d S )N)rF   rQ   r`   r   )r   r   )r(   r)   r   rB   layer_norm_1rv   	attentionr&   Identityr%   layer_norm_2rS   r   mlp)r*   r_   rF   rQ   r%   r`   	mlp_ratioZmlp_hidden_sizer+   r#   r$   r)   &  s    
zGLPNLayer.__init__Fc           
      C   sr   | j | ||||d}|d }|dd  }| |}|| }| | |||}| |}|| }	|	f| }|S )N)rn   r   r   )r   r   r%   r   r   )
r*   r.   rL   rM   rn   Zself_attention_outputsr~   rq   Z
mlp_outputZlayer_outputr#   r#   r$   r/   4  s    


zGLPNLayer.forward)FrN   r#   r#   r+   r$   r   #  s   r   c                       s&   e Zd Z fddZdddZ  ZS )GLPNEncoderc           	         sT  t     | _dd td jt jD }g }t j	D ]D}|
t j|  j| |dkrf jn j|d   j| d q>t|| _g }d}t j	D ]}g }|dkr| j|d  7 }t j| D ]>}|
t  j|  j| |||   j|  j| d q|
t| qt|| _t fddt j	D | _d S )Nc                 S   s   g | ]}|  qS r#   )item).0xr#   r#   r$   
<listcomp>T  s     z(GLPNEncoder.__init__.<locals>.<listcomp>r   r   )rD   r>   rE   rF   )rF   rQ   r%   r`   r   c                    s   g | ]}t  j| qS r#   )r   rB   hidden_sizes)r   ir_   r#   r$   r   |  s     )r(   r)   r_   r    ZlinspaceZdrop_path_ratesumZdepthsrangeZnum_encoder_blocksappendr:   Zpatch_sizesstridesrE   r   r   
ModuleListpatch_embeddingsr   rQ   Z	sr_ratiosZ
mlp_ratiosblockrC   )	r*   r_   ZdprrJ   r   blockscurZlayersjr+   r   r$   r)   O  sH    
 

zGLPNEncoder.__init__FTc                 C   s   |rdnd }|rdnd }|j d }|}tt| j| j| jD ]\}	}
|
\}}}||\}}}t|D ]0\}}|||||}|d }|rd||d f }qd||}||||ddddd }|r<||f }q<|st	dd |||fD S t
|||d	S )
Nr#   r   r   ra   r   r;   c                 s   s   | ]}|d k	r|V  qd S r'   r#   )r   vr#   r#   r$   	<genexpr>  s      z&GLPNEncoder.forward.<locals>.<genexpr>Zlast_hidden_stater.   
attentions)r   	enumeratezipr   r   rC   ri   rd   rm   tupler	   )r*   rI   rn   output_hidden_statesreturn_dictZall_hidden_statesZall_self_attentionsro   r.   idxr   Zembedding_layerZblock_layerZ
norm_layerrL   rM   r   ZblkZlayer_outputsr#   r#   r$   r/     s.    

 zGLPNEncoder.forward)FFTru   r#   r#   r+   r$   r   N  s
   3   r   c                   @   s$   e Zd ZdZeZdZdZdd ZdS )GLPNPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    glpnrI   c                 C   s   t |tjtjfr@|jjjd| jjd |j	dk	r|j	j
  nft |tjr|jjjd| jjd |jdk	r|jj|j 
  n&t |tjr|j	j
  |jjd dS )zInitialize the weightsr   )meanZstdNg      ?)r   r   rV   r@   weightdataZnormal_r_   Zinitializer_ranger   Zzero_Z	EmbeddingZpadding_idxrB   Zfill_)r*   moduler#   r#   r$   _init_weights  s    

z!GLPNPreTrainedModel._init_weightsN)	r2   r3   r4   r5   r   config_classZbase_model_prefixZmain_input_namer   r#   r#   r#   r$   r     s
   r   aG  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`GLPNConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aG  

    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using
            [`AutoImageProcessor`]. See [`GLPNImageProcessor.__call__`] for details.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zfThe bare GLPN encoder (Mix-Transformer) outputting raw hidden-states without any specific head on top.c                
       sr   e Zd Z fddZdd Zeedee	e
ededdejee ee ee eee
f d	d
dZ  ZS )	GLPNModelc                    s(   t  | || _t|| _|   d S r'   )r(   r)   r_   r   encoder	post_initr*   r_   r+   r#   r$   r)     s    
zGLPNModel.__init__c                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr   layerr   r}   )r*   Zheads_to_pruner   r{   r#   r#   r$   _prune_heads  s    zGLPNModel._prune_headsz(batch_size, sequence_length)Zvision)
checkpointoutput_typer   ZmodalityZexpected_outputN)rI   rn   r   r   r   c                 C   s~   |d k	r|n| j j}|d k	r |n| j j}|d k	r4|n| j j}| j||||d}|d }|sl|f|dd   S t||j|jdS )Nrn   r   r   r   r   r   )r_   rn   r   use_return_dictr   r	   r.   r   )r*   rI   rn   r   r   Zencoder_outputsZsequence_outputr#   r#   r$   r/     s$    zGLPNModel.forward)NNN)r2   r3   r4   r)   r   r   GLPN_INPUTS_DOCSTRINGr0   r   _CHECKPOINT_FOR_DOCr	   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr    FloatTensorr   boolr   r   r/   r9   r#   r#   r+   r$   r     s(   
   
r   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )GLPNSelectiveFeatureFusionz
    Selective Feature Fusion module, as explained in the [paper](https://arxiv.org/abs/2201.07436) (section 3.4). This
    module adaptively selects and integrates local and global features by attaining an attention map for each feature.
    @   c              	      s   t    ttjt|d |ddddt|t | _ttj|t|d ddddtt|d t | _	tjt|d ddddd| _
t | _d S )Nr;   r   r   )in_channelsout_channelsr=   r>   r?   )r(   r)   r   
Sequentialr@   rS   ZBatchNorm2dReLUconvolutional_layer1convolutional_layer2convolutional_layer3ZSigmoidsigmoid)r*   Z
in_channelr+   r#   r$   r)   %  s&    

    z#GLPNSelectiveFeatureFusion.__init__c                 C   s   t j||fdd}| |}| |}| |}| |}||d d dd d d d f d ||d d dd d d d f d  }|S )Nr   rg   r   )r    catr   r   r   r   Z	unsqueeze)r*   Zlocal_featuresZglobal_featuresfeaturesZattnZhybrid_featuresr#   r#   r$   r/   :  s    



.z"GLPNSelectiveFeatureFusion.forward)r   rN   r#   r#   r+   r$   r     s   r   c                       s&   e Zd Z fddZdddZ  ZS )GLPNDecoderStagec                    sP   t    ||k}|s&tj||ddnt | _t|| _tjdddd| _	d S )Nr   )r=   r;   bilinearFZscale_factormodeZalign_corners)
r(   r)   r   r@   r   convolutionr   fusionUpsampleupsample)r*   r   r   should_skipr+   r#   r$   r)   L  s
    

zGLPNDecoderStage.__init__Nc                 C   s,   |  |}|d k	r| ||}| |}|S r'   )r   r   r   )r*   hidden_stateZresidualr#   r#   r$   r/   S  s    

 zGLPNDecoderStage.forward)Nru   r#   r#   r+   r$   r   K  s   r   c                       s8   e Zd Z fddZeej eej dddZ  ZS )GLPNDecoderc                    s\   t    |jd d d }|j t fdd|D | _d | jd _tjdddd| _	d S )	Nra   c                    s   g | ]}t | qS r#   )r   )r   rF   r   r#   r$   r   g  s     z(GLPNDecoder.__init__.<locals>.<listcomp>r   r;   r   Fr   )
r(   r)   r   decoder_hidden_sizer   r   stagesr   r   final_upsample)r*   r_   Zreserved_hidden_sizesr+   r   r$   r)   `  s    
zGLPNDecoder.__init__r-   c                 C   sN   g }d }t |d d d | jD ]\}}|||}|| q| ||d< |S )Nra   )r   r   r   r   )r*   r.   Zstage_hidden_statesZstage_hidden_stater   Zstager#   r#   r$   r/   n  s    
zGLPNDecoder.forward	r2   r3   r4   r)   r   r    r7   r/   r9   r#   r#   r+   r$   r   _  s   r   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )	SiLogLossz
    Implements the Scale-invariant log scale loss [Eigen et al., 2014](https://arxiv.org/abs/1406.2283).

    $$L=\frac{1}{n} \sum_{i} d_{i}^{2}-\frac{1}{2 n^{2}}\left(\sum_{i} d_{i}^{2}\right)$$ where $d_{i}=\log y_{i}-\log
    y_{i}^{*}$.

          ?c                    s   t    || _d S r'   )r(   r)   lambd)r*   r   r+   r#   r$   r)     s    
zSiLogLoss.__init__c                 C   sX   |dk  }t|| t||  }tt|d | jt| d  }|S )Nr   r;   )detachr    logrl   powr   r   )r*   predtargetZ
valid_maskZdiff_loglossr#   r#   r$   r/     s    ,zSiLogLoss.forward)r   rN   r#   r#   r+   r$   r   z  s   r   c                       s4   e Zd Z fddZeej ejdddZ  ZS )GLPNDepthEstimationHeadc                    sR   t    || _|j}ttj||ddddtjddtj|ddddd| _d S )Nr   r   r<   F)Zinplace)	r(   r)   r_   r   r   r   r@   r   head)r*   r_   Zchannelsr+   r#   r$   r)     s    

z GLPNDepthEstimationHead.__init__r-   c                 C   s8   || j j }| |}t|| j j }|jdd}|S )Nr   rg   )r_   Zhead_in_indexr   r    r   	max_depthZsqueeze)r*   r.   predicted_depthr#   r#   r$   r/     s
    
zGLPNDepthEstimationHead.forwardr   r#   r#   r+   r$   r     s   r   z]GLPN Model transformer with a lightweight depth estimation head on top e.g. for KITTI, NYUv2.c                       sr   e Zd Z fddZeedeee	dd	e
jee
j ee ee ee eee
j ef dddZ  ZS )
GLPNForDepthEstimationc                    s6   t  | t|| _t|| _t|| _|   d S r'   )	r(   r)   r   r   r   decoderr   r   r   r   r+   r#   r$   r)     s
    


zGLPNForDepthEstimation.__init__zbatch_size, sequence_length)r   r   N)rI   labelsrn   r   r   r   c                 C   s   |dk	r|n| j j}|dk	r |n| j j}| j||d|d}|rD|jn|d }| |}| |}	d}
|dk	r|t }||	|}
|s|r|	f|dd  }n|	f|dd  }|
dk	r|
f| S |S t|
|	|r|jnd|j	dS )a  
        labels (`torch.FloatTensor` of shape `(batch_size, height, width)`, *optional*):
            Ground truth depth estimation maps for computing the loss.

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, GLPNForDepthEstimation
        >>> import torch
        >>> import numpy as np
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> image_processor = AutoImageProcessor.from_pretrained("vinvino02/glpn-kitti")
        >>> model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-kitti")

        >>> # prepare image for the model
        >>> inputs = image_processor(images=image, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**inputs)
        ...     predicted_depth = outputs.predicted_depth

        >>> # interpolate to original size
        >>> prediction = torch.nn.functional.interpolate(
        ...     predicted_depth.unsqueeze(1),
        ...     size=image.size[::-1],
        ...     mode="bicubic",
        ...     align_corners=False,
        ... )

        >>> # visualize the prediction
        >>> output = prediction.squeeze().cpu().numpy()
        >>> formatted = (output * 255 / np.max(output)).astype("uint8")
        >>> depth = Image.fromarray(formatted)
        ```NTr   r   r;   )r   r   r.   r   )
r_   r   r   r   r.   r   r   r   r
   r   )r*   rI   r   rn   r   r   rq   r.   outr   r   Zloss_fctr"   r#   r#   r$   r/     s6    3


zGLPNForDepthEstimation.forward)NNNN)r2   r3   r4   r)   r   r   r0   r   r
   r   r    r   r   r   r   r   r7   r/   r9   r#   r#   r+   r$   r     s   

    r   )r   F);r5   rk   typingr   r   r   r   r    Ztorch.utils.checkpointr   Zactivationsr   Zmodeling_outputsr	   r
   Zmodeling_utilsr   Zpytorch_utilsr   r   utilsr   r   r   r   r   Zconfiguration_glpnr   Z
get_loggerr2   loggerr   r   r   Z"GLPN_PRETRAINED_MODEL_ARCHIVE_LISTr7   r6   r   r%   Moduler&   r:   rO   rr   rv   r   r   r   r   r   ZGLPN_START_DOCSTRINGr   r   r   r   r   r   r   r   r#   r#   r#   r$   <module>   sZ   
R'+X<,