U
    9%eo                     @   s  d Z ddlZddlZddlmZmZmZmZ ddlZddl	m
  mZ ddlZddlm
Z
 ddlmZmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZ ddlmZmZmZm Z  ddl!m"Z" e #e$Z%dZ&dZ'dddgZ(dZ)dZ*dgZ+d2ej,e-e.ej,dddZ/G dd de
j0Z1G dd de
j0Z2G dd de
j0Z3G dd de
j0Z4G d d! d!e
j0Z5G d"d# d#e
j0Z6G d$d% d%e
j0Z7G d&d' d'e
j0Z8G d(d) d)eZ9d*Z:d+Z;ed,e:G d-d. d.e9Z<ed/e:G d0d1 d1e9Z=dS )3z PyTorch PVT model.    N)IterableOptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputImageClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardlogging   )	PvtConfigr   zZetatech/pvt-tiny-2242   i   ztabby, tabby cat        F)input	drop_probtrainingreturnc                 C   sd   |dks|s| S d| }| j d fd| jd   }|tj|| j| jd }|  | || }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r   r   r   )r   )dtypedevice)shapendimtorchZrandr   r   Zfloor_div)r   r   r   Z	keep_probr   Zrandom_tensoroutput r$   c/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/transformers/models/pvt/modeling_pvt.py	drop_path;   s    
r&   c                       sP   e Zd ZdZdee dd fddZejejdddZ	e
d	d
dZ  ZS )PvtDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).N)r   r   c                    s   t    || _d S N)super__init__r   )selfr   	__class__r$   r%   r*   S   s    
zPvtDropPath.__init__hidden_statesr   c                 C   s   t || j| jS r(   )r&   r   r   r+   r/   r$   r$   r%   forwardW   s    zPvtDropPath.forward)r   c                 C   s   d | jS )Nzp={})formatr   )r+   r$   r$   r%   
extra_reprZ   s    zPvtDropPath.extra_repr)N)__name__
__module____qualname____doc__r   floatr*   r!   Tensorr1   strr3   __classcell__r$   r$   r,   r%   r'   P   s   r'   c                	       s   e Zd ZdZdeeeee f eeee f eeeed fddZ	e
jeee
jdddZe
jee
jeef d	d
dZ  ZS )PvtPatchEmbeddingsz
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
    Transformer.
    Fconfig
image_size
patch_sizestridenum_channelshidden_size	cls_tokenc           	         s   t    || _t|tjjr"|n||f}t|tjjr<|n||f}|d |d  |d |d   }|| _|| _|| _	|| _
ttd|r|d n||| _|rttdd|nd | _tj||||d| _tj||jd| _tj|jd| _d S )Nr   r   Zkernel_sizerA   Zeps)p)r)   r*   r>   
isinstancecollectionsabcr   r?   r@   rB   num_patchesr   	Parameterr!   Zrandnposition_embeddingsZzerosrD   Conv2d
projection	LayerNormlayer_norm_eps
layer_normDropouthidden_dropout_probdropout)	r+   r>   r?   r@   rA   rB   rC   rD   rK   r,   r$   r%   r*   e   s     

 zPvtPatchEmbeddings.__init__)
embeddingsheightwidthr   c                 C   sr   || }|| j j| j j kr"| jS |d||ddddd}tj|||fdd}|dd|| ddd}|S )Nr   r   r
      Zbilinear)sizemode)r>   r?   rM   reshapepermuteFZinterpolate)r+   rV   rW   rX   rK   Zinterpolated_embeddingsr$   r$   r%   interpolate_pos_encoding   s    z+PvtPatchEmbeddings.interpolate_pos_encoding)pixel_valuesr   c                 C   s   |j \}}}}|| jkr td| |}|j ^ }}}|ddd}| |}| jd k	r| j|dd}	t	j
|	|fdd}| | jd d dd f ||}
t	j
| jd d d df |
fdd}
n| | j||}
| ||
 }|||fS )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rZ   r   rY   dim)r   rB   
ValueErrorrO   flatten	transposerR   rD   expandr!   catr`   rM   rU   )r+   ra   
batch_sizerB   rW   rX   Zpatch_embed_rV   rD   rM   r$   r$   r%   r1      s"    



 &zPvtPatchEmbeddings.forward)F)r4   r5   r6   r7   r   r   intr   boolr*   r!   r9   r`   r   r1   r;   r$   r$   r,   r%   r<   ^   s    	r<   c                       s8   e Zd Zeed fddZejejdddZ  Z	S )PvtSelfOutput)r>   rC   c                    s*   t    t||| _t|j| _d S r(   )r)   r*   r   LineardenserS   rT   rU   )r+   r>   rC   r,   r$   r%   r*      s    
zPvtSelfOutput.__init__r.   c                 C   s   |  |}| |}|S r(   )ro   rU   r0   r$   r$   r%   r1      s    

zPvtSelfOutput.forward)
r4   r5   r6   r   rk   r*   r!   r9   r1   r;   r$   r$   r,   r%   rm      s   rm   c                       s^   e Zd ZdZeeeed fddZeej	dddZ
dej	eeeeej	 d	d
dZ  ZS )PvtEfficientSelfAttentionzpEfficient self-attention mechanism with reduction of the sequence [PvT paper](https://arxiv.org/abs/2102.12122).r>   rC   num_attention_headssequences_reduction_ratioc                    s   t    || _|| _| j| j dkr@td| j d| j dt| j| j | _| j| j | _tj	| j| j|j
d| _tj	| j| j|j
d| _tj	| j| j|j
d| _t|j| _|| _|dkrtj||||d| _tj||jd| _d S )	Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ())biasr   rE   rF   )r)   r*   rC   rr   rd   rk   attention_head_sizeall_head_sizer   rn   Zqkv_biasquerykeyvaluerS   Zattention_probs_dropout_probrU   rs   rN   sequence_reductionrP   rQ   rR   r+   r>   rC   rr   rs   r,   r$   r%   r*      s,    
   z"PvtEfficientSelfAttention.__init__r.   c                 C   s6   |  d d | j| jf }||}|ddddS )NrY   r   rZ   r   r
   )r[   rr   rv   viewr^   )r+   r/   Z	new_shaper$   r$   r%   transpose_for_scores   s    
z.PvtEfficientSelfAttention.transpose_for_scoresFr/   rW   rX   output_attentionsr   c                 C   s$  |  | |}| jdkrl|j\}}}|ddd||||}| |}|||dddd}| |}|  | |}	|  | 	|}
t
||	dd}|t| j }tjj|dd}| |}t
||
}|dddd }| d d | jf }||}|r||fn|f}|S )Nr   r   rZ   rY   rb   r
   )r~   rx   rs   r   r^   r]   r{   rR   ry   rz   r!   matmulrf   mathsqrtrv   r   
functionalZsoftmaxrU   
contiguousr[   rw   r}   )r+   r/   rW   rX   r   Zquery_layerri   Zseq_lenrB   Z	key_layerZvalue_layerZattention_scoresZattention_probsZcontext_layerZnew_context_layer_shapeoutputsr$   r$   r%   r1      s&    




z!PvtEfficientSelfAttention.forward)F)r4   r5   r6   r7   r   rk   r8   r*   r!   r9   r~   rl   r   r1   r;   r$   r$   r,   r%   rp      s      
 rp   c                       sP   e Zd Zeeeed fddZdd Zd
ej	eee
eej	 ddd	Z  ZS )PvtAttentionrq   c                    s6   t    t||||d| _t||d| _t | _d S )N)rC   rr   rs   )rC   )r)   r*   rp   r+   rm   r#   setpruned_headsr|   r,   r$   r%   r*      s    
zPvtAttention.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   rb   )lenr   r+   rr   rv   r   r   rx   ry   rz   r#   ro   rw   union)r+   headsindexr$   r$   r%   prune_heads  s       zPvtAttention.prune_headsFr   c                 C   s4   |  ||||}| |d }|f|dd   }|S )Nr   r   )r+   r#   )r+   r/   rW   rX   r   Zself_outputsattention_outputr   r$   r$   r%   r1     s    zPvtAttention.forward)F)r4   r5   r6   r   rk   r8   r*   r   r!   r9   rl   r   r1   r;   r$   r$   r,   r%   r      s          r   c                       sF   e Zd Zdeeee ee d fddZejejdddZ	  Z
S )	PvtFFNN)r>   in_featureshidden_featuresout_featuresc                    sj   t    |d k	r|n|}t||| _t|jtrBt|j | _	n|j| _	t||| _
t|j| _d S r(   )r)   r*   r   rn   dense1rH   Z
hidden_actr:   r   intermediate_act_fndense2rS   rT   rU   )r+   r>   r   r   r   r,   r$   r%   r*   )  s    
zPvtFFN.__init__r.   c                 C   s6   |  |}| |}| |}| |}| |}|S r(   )r   r   rU   r   r0   r$   r$   r%   r1   :  s    




zPvtFFN.forward)NN)r4   r5   r6   r   rk   r   r*   r!   r9   r1   r;   r$   r$   r,   r%   r   (  s     r   c                       sD   e Zd Zeeeeeed fddZdejeee	dddZ
  ZS )	PvtLayerr>   rC   rr   r&   rs   	mlp_ratioc                    sz   t    tj||jd| _t||||d| _|dkr>t|nt	 | _
tj||jd| _t|| }t|||d| _d S )NrF   rq   r   )r>   r   r   )r)   r*   r   rP   rQ   layer_norm_1r   	attentionr'   Identityr&   layer_norm_2rk   r   mlp)r+   r>   rC   rr   r&   rs   r   Zmlp_hidden_sizer,   r$   r%   r*   D  s    	
zPvtLayer.__init__Fr/   rW   rX   r   c           
      C   sn   | j | ||||d}|d }|dd  }| |}|| }| | |}| |}|| }	|	f| }|S )Nr   r   r   )r   r   r&   r   r   )
r+   r/   rW   rX   r   Zself_attention_outputsr   r   Z
mlp_outputZlayer_outputr$   r$   r%   r1   Z  s    


zPvtLayer.forward)F)r4   r5   r6   r   rk   r8   r*   r!   r9   rl   r1   r;   r$   r$   r,   r%   r   C  s   r   c                       sP   e Zd Zed fddZd	ejee ee ee e	e
ef dddZ  ZS )

PvtEncoderr>   c           	         st  t    || _td|jt|j }g }t	|j
D ]r}|t||dkrR|jn| jjd|d   |j| |j| |dkr|jn|j|d  |j| ||j
d kd q8t|| _g }d}t	|j
D ]}g }|dkr||j|d  7 }t	|j| D ]>}|t||j| |j| |||  |j| |j| d q|t| qt|| _tj|jd |jd| _d S )Nr   rZ   r   r=   r   rY   rF   )r)   r*   r>   r!   ZlinspaceZdrop_path_ratesumZdepthstolistrangeZnum_encoder_blocksappendr<   r?   Zpatch_sizesstridesrB   hidden_sizesr   Z
ModuleListpatch_embeddingsr   rr   Zsequence_reduction_ratiosZ
mlp_ratiosblockrP   rQ   rR   )	r+   r>   Zdrop_path_decaysrV   iblockscurZlayersjr,   r$   r%   r*   r  sJ    
 

zPvtEncoder.__init__FTra   r   output_hidden_statesreturn_dictr   c                 C   s  |rdnd }|rdnd }|j d }t| j}|}	tt| j| jD ]\}
\}}||	\}	}}|D ]:}||	|||}|d }	|r||d f }|r`||	f }q`|
|d krB|	|||ddddd }	qB| 	|	}	|r||	f }|st
dd |	||fD S t|	||d	S )
Nr$   r   r   rY   r
   rZ   c                 s   s   | ]}|d k	r|V  qd S r(   r$   ).0vr$   r$   r%   	<genexpr>  s      z%PvtEncoder.forward.<locals>.<genexpr>Zlast_hidden_stater/   
attentions)r   r   r   	enumeratezipr   r]   r^   r   rR   tupler   )r+   ra   r   r   r   Zall_hidden_statesZall_self_attentionsri   Z
num_blocksr/   idxZembedding_layerZblock_layerrW   rX   r   Zlayer_outputsr$   r$   r%   r1     s4    

"

zPvtEncoder.forward)FFT)r4   r5   r6   r   r*   r!   FloatTensorr   rl   r   r   r   r1   r;   r$   r$   r,   r%   r   q  s   5   
r   c                   @   sN   e Zd ZdZeZdZdZee	j
e	je	jf ddddZdeed	d
dZdS )PvtPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    pvtra   N)moduler   c                 C   s   t |tjrBtjj|jjd| jjd|j_|j	dk	r|j	j
  nxt |tjrj|j	j
  |jjd nPt |trtjj|jjd| jjd|j_|jdk	rtjj|jjd| jjd|j_dS )zInitialize the weightsr   )ZmeanZstdNg      ?)rH   r   rn   initZtrunc_normal_weightdatar>   Zinitializer_rangeru   Zzero_rP   Zfill_r<   rM   rD   )r+   r   r$   r$   r%   _init_weights  s&    



z PvtPreTrainedModel._init_weightsF)r   rz   c                 C   s   t |tr||_d S r(   )rH   r   Zgradient_checkpointing)r+   r   rz   r$   r$   r%   _set_gradient_checkpointing  s    
z.PvtPreTrainedModel._set_gradient_checkpointing)F)r4   r5   r6   r7   r   config_classZbase_model_prefixZmain_input_namer   r   rn   rN   rP   r   r   rl   r   r$   r$   r$   r%   r     s    r   aG  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`~PvtConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a
  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See [`PvtImageProcessor.__call__`]
            for details.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zSThe bare Pvt encoder outputting raw hidden-states without any specific head on top.c                
       sx   e Zd Zed fddZdd Zeede	e
eededdejee ee ee eeef d
ddZ  ZS )PvtModelr   c                    s(   t  | || _t|| _|   d S r(   )r)   r*   r>   r   encoder	post_initr+   r>   r,   r$   r%   r*     s    
zPvtModel.__init__c                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr   layerr   r   )r+   Zheads_to_pruner   r   r$   r$   r%   _prune_heads  s    zPvtModel._prune_heads%(batch_size, channels, height, width)Zvision)
checkpointoutput_typer   Zmodalityexpected_outputNr   c                 C   s~   |d k	r|n| j j}|d k	r |n| j j}|d k	r4|n| j j}| j||||d}|d }|sl|f|dd   S t||j|jdS )Nra   r   r   r   r   r   r   )r>   r   r   use_return_dictr   r   r/   r   )r+   ra   r   r   r   Zencoder_outputssequence_outputr$   r$   r%   r1   #  s$    zPvtModel.forward)NNN)r4   r5   r6   r   r*   r   r   PVT_INPUTS_DOCSTRINGr2   r   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr!   r   r   rl   r   r   r1   r;   r$   r$   r,   r%   r     s(   

   
r   z
    Pvt Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    c                       s|   e Zd Zedd fddZeedee	e
eedd
eej eej ee ee ee eee
f ddd	Z  ZS )PvtForImageClassificationN)r>   r   c                    sR   t  | |j| _t|| _|jdkr<t|jd |jnt | _	| 
  d S )Nr   rY   )r)   r*   
num_labelsr   r   r   rn   r   r   
classifierr   r   r,   r$   r%   r*   R  s    
$z"PvtForImageClassification.__init__r   )r   r   r   r   )ra   labelsr   r   r   r   c                 C   sz  |dk	r|n| j j}| j||||d}|d }| |dddddf }d}	|dk	r6| j jdkr| jdkrxd| j _n4| jdkr|jtjks|jtj	krd| j _nd| j _| j jdkrt
 }
| jdkr|
| | }	n
|
||}	nN| j jdkrt }
|
|d| j|d}	n| j jdkr6t }
|
||}	|sf|f|dd  }|	dk	rb|	f| S |S t|	||j|jd	S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   r   Z
regressionZsingle_label_classificationZmulti_label_classificationrY   )losslogitsr/   r   )r>   r   r   r   Zproblem_typer   r   r!   longrk   r	   Zsqueezer   r}   r   r   r/   r   )r+   ra   r   r   r   r   r   r   r   r   Zloss_fctr#   r$   r$   r%   r1   `  sJ    


"


z!PvtForImageClassification.forward)NNNN)r4   r5   r6   r   r*   r   r   r2   r   _IMAGE_CLASS_CHECKPOINTr   r   _IMAGE_CLASS_EXPECTED_OUTPUTr   r!   r9   rl   r   r   r1   r;   r$   r$   r,   r%   r   J  s(   	    
r   )r   F)>r7   rI   r   typingr   r   r   r   r!   Ztorch.nn.functionalr   r   r_   Ztorch.utils.checkpointZtorch.nnr   r   r	   Zactivationsr   Zmodeling_outputsr   r   Zmodeling_utilsr   Zpytorch_utilsr   r   utilsr   r   r   r   Zconfiguration_pvtr   Z
get_loggerr4   loggerr   r   r   r   r   Z!PVT_PRETRAINED_MODEL_ARCHIVE_LISTr9   r8   rl   r&   Moduler'   r<   rm   rp   r   r   r   r   r   ZPVT_START_DOCSTRINGr   r   r   r$   r$   r$   r%   <module>   sX   

BR*.Y':