U
    ,-eV                     @   s  d Z ddlmZmZmZ ddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZmZmZmZ ddlmZ ddlmZ ee Z!dZ"dZ#ddddgZ$dZ%dZ&dgZ'd1ej(e)e*ej(dddZ+G dd dej,Z-G dd dej,Z.G dd dej,Z/G dd dej,Z0G d d! d!ej,Z1G d"d# d#ej,Z2G d$d% d%eZ3d&Z4d'Z5ed(e4G d)d* d*e3Z6ed+e4G d,d- d-e3Z7ed.e4G d/d0 d0e3eZ8dS )2z PyTorch ConvNext model.    )OptionalTupleUnionN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings)BackboneMixin   )ConvNextConfigr   zfacebook/convnext-tiny-224i      ztabby, tabby cat        F)input	drop_probtrainingreturnc                 C   sd   |dks|s| S d| }| j d fd| jd   }|tj|| j| jd }|  | || }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r   r   r   )r   )dtypedevice)shapendimtorchZrandr   r   Zfloor_div)r   r   r   Z	keep_probr    Zrandom_tensoroutput r%   o/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/transformers/models/convnext/modeling_convnext.py	drop_path@   s    
r'   c                       sP   e Zd ZdZdee dd fddZejejdddZ	e
d	d
dZ  ZS )ConvNextDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).N)r   r   c                    s   t    || _d S N)super__init__r   )selfr   	__class__r%   r&   r+   X   s    
zConvNextDropPath.__init__hidden_statesr   c                 C   s   t || j| jS r)   )r'   r   r   r,   r0   r%   r%   r&   forward\   s    zConvNextDropPath.forward)r   c                 C   s   d | jS )Nzp={})formatr   )r,   r%   r%   r&   
extra_repr_   s    zConvNextDropPath.extra_repr)N)__name__
__module____qualname____doc__r   floatr+   r"   Tensorr2   strr4   __classcell__r%   r%   r-   r&   r(   U   s   r(   c                       s6   e Zd ZdZd	 fdd	ZejejdddZ  ZS )
ConvNextLayerNormaA  LayerNorm that supports two data formats: channels_last (default) or channels_first.
    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch_size, height,
    width, channels) while channels_first corresponds to inputs with shape (batch_size, channels, height, width).
    ư>channels_lastc                    s`   t    tt|| _tt|| _|| _	|| _
| j
dkrTtd| j
 |f| _d S )N)r?   channels_firstzUnsupported data format: )r*   r+   r   	Parameterr"   onesweightZzerosbiasepsdata_formatNotImplementedErrornormalized_shape)r,   rH   rE   rF   r-   r%   r&   r+   i   s    

zConvNextLayerNorm.__init__)xr   c                 C   s   | j dkr*tjj|| j| j| j| j}n| j dkr|j	}|
 }|jddd}|| djddd}|| t|| j  }|j|d}| jd d d d f | | jd d d d f  }|S )Nr?   r@   r   T)Zkeepdim   )r   )rF   r"   r   Z
functionalZ
layer_normrH   rC   rD   rE   r   r9   meanpowsqrtto)r,   rI   Zinput_dtypeusr%   r%   r&   r2   s   s    
 
,zConvNextLayerNorm.forward)r>   r?   )	r5   r6   r7   r8   r+   r"   r:   r2   r<   r%   r%   r-   r&   r=   c   s   
r=   c                       s4   e Zd ZdZ fddZejejdddZ  Z	S )ConvNextEmbeddingszThis class is comparable to (and inspired by) the SwinEmbeddings class
    found in src/transformers/models/swin/modeling_swin.py.
    c                    sL   t    tj|j|jd |j|jd| _t|jd ddd| _	|j| _d S )Nr   kernel_sizestrider>   r@   rE   rF   )
r*   r+   r   Conv2dnum_channelshidden_sizesZ
patch_sizepatch_embeddingsr=   	layernormr,   configr-   r%   r&   r+      s    
   zConvNextEmbeddings.__init__)pixel_valuesr   c                 C   s4   |j d }|| jkrtd| |}| |}|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r    rW   
ValueErrorrY   rZ   )r,   r]   rW   
embeddingsr%   r%   r&   r2      s    



zConvNextEmbeddings.forward
r5   r6   r7   r8   r+   r"   FloatTensorr:   r2   r<   r%   r%   r-   r&   rQ      s   rQ   c                       s6   e Zd ZdZd fdd	ZejejdddZ  Z	S )	ConvNextLayera3  This corresponds to the `Block` class in the original implementation.

    There are two equivalent implementations: [DwConv, LayerNorm (channels_first), Conv, GELU,1x1 Conv]; all in (N, C,
    H, W) (2) [DwConv, Permute to (N, H, W, C), LayerNorm (channels_last), Linear, GELU, Linear]; Permute back

    The authors used (2) as they find it slightly faster in PyTorch.

    Args:
        config ([`ConvNextConfig`]): Model configuration class.
        dim (`int`): Number of input channels.
        drop_path (`float`): Stochastic depth rate. Default: 0.0.
    r   c                    s   t    tj||dd|d| _t|dd| _t|d| | _t	|j
 | _td| || _|jdkrtj|jt| dd	nd | _|d
krt|nt | _d S )Nr   r	   )rS   paddinggroupsr>   rE      r   T)Zrequires_gradr   )r*   r+   r   rV   dwconvr=   rZ   Linearpwconv1r
   Z
hidden_actactpwconv2Zlayer_scale_init_valuerA   r"   rB   layer_scale_parameterr(   Identityr'   )r,   r\   dimr'   r-   r%   r&   r+      s    
zConvNextLayer.__init__r/   c                 C   s|   |}|  |}|dddd}| |}| |}| |}| |}| jd k	rZ| j| }|dddd}|| | }|S )Nr   rJ   r	   r   )rg   ZpermuterZ   ri   rj   rk   rl   r'   )r,   r0   r   rI   r%   r%   r&   r2      s    






zConvNextLayer.forward)r   r`   r%   r%   r-   r&   rb      s   rb   c                       s6   e Zd ZdZd	 fdd	ZejejdddZ  Z	S )
ConvNextStagea  ConvNeXT stage, consisting of an optional downsampling layer + multiple residual blocks.

    Args:
        config ([`ConvNextConfig`]): Model configuration class.
        in_channels (`int`): Number of input channels.
        out_channels (`int`): Number of output channels.
        depth (`int`): Number of residual blocks.
        drop_path_rates(`List[float]`): Stochastic depth rates for each layer.
    rJ   Nc              	      s   t    |ks|dkrBtt|dddtj|||d| _n
t | _pXdg| tj fddt|D  | _	d S )	Nr   r>   r@   rU   rR   r   c                    s   g | ]}t  | d qS ))rn   r'   )rb   ).0jr\   drop_path_ratesout_channelsr%   r&   
<listcomp>   s     z*ConvNextStage.__init__.<locals>.<listcomp>)
r*   r+   r   Z
Sequentialr=   rV   downsampling_layerrm   rangelayers)r,   r\   in_channelsrt   rS   rT   depthrs   r-   rr   r&   r+      s    

zConvNextStage.__init__r/   c                 C   s   |  |}| |}|S r)   )rv   rx   r1   r%   r%   r&   r2      s    

zConvNextStage.forward)rJ   rJ   rJ   Nr`   r%   r%   r-   r&   ro      s   
ro   c                       sD   e Zd Z fddZdejee ee ee	e
f dddZ  ZS )	ConvNextEncoderc              	      s   t    t | _dd td|jt|j	
|j	D }|jd }t|jD ]H}|j| }t||||dkrrdnd|j	| || d}| j| |}qPd S )Nc                 S   s   g | ]}|  qS r%   )tolist)rp   rI   r%   r%   r&   ru      s    z,ConvNextEncoder.__init__.<locals>.<listcomp>r   rJ   r   )ry   rt   rT   rz   rs   )r*   r+   r   Z
ModuleListstagesr"   ZlinspaceZdrop_path_ratesumZdepthssplitrX   rw   Z
num_stagesro   append)r,   r\   rs   Zprev_chsiZout_chsstager-   r%   r&   r+      s$    



zConvNextEncoder.__init__FT)r0   output_hidden_statesreturn_dictr   c                 C   sj   |rdnd }t | jD ]\}}|r,||f }||}q|rD||f }|s^tdd ||fD S t||dS )Nr%   c                 s   s   | ]}|d k	r|V  qd S r)   r%   )rp   vr%   r%   r&   	<genexpr>  s      z*ConvNextEncoder.forward.<locals>.<genexpr>)last_hidden_stater0   )	enumerater}   tupler   )r,   r0   r   r   Zall_hidden_statesr   Zlayer_moduler%   r%   r&   r2      s    


zConvNextEncoder.forward)FT)r5   r6   r7   r+   r"   ra   r   boolr   r   r   r2   r<   r%   r%   r-   r&   r{      s     
r{   c                   @   s2   e Zd ZdZeZdZdZdZdd Z	ddd	Z
d
S )ConvNextPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    convnextr]   Tc                 C   sj   t |tjtjfr@|jjjd| jjd |j	dk	rf|j	j
  n&t |tjrf|j	j
  |jjd dS )zInitialize the weightsr   )rK   ZstdNg      ?)
isinstancer   rh   rV   rC   dataZnormal_r\   Zinitializer_rangerD   Zzero_	LayerNormZfill_)r,   moduler%   r%   r&   _init_weights  s    
z%ConvNextPreTrainedModel._init_weightsFc                 C   s   t |tr||_d S r)   )r   r{   Zgradient_checkpointing)r,   r   valuer%   r%   r&   _set_gradient_checkpointing+  s    
z3ConvNextPreTrainedModel._set_gradient_checkpointingN)F)r5   r6   r7   r8   r   config_classZbase_model_prefixZmain_input_nameZsupports_gradient_checkpointingr   r   r%   r%   r%   r&   r     s   r   aJ  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. Use it
    as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`ConvNextConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aF  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`ConvNextImageProcessor.__call__`] for details.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zQThe bare ConvNext model outputting raw features without any specific head on top.c                	       s^   e Zd Z fddZeeeeee	de
dd	ejee ee eeef dddZ  ZS )
ConvNextModelc                    sJ   t  | || _t|| _t|| _tj|j	d |j
d| _|   d S )Nre   )r*   r+   r\   rQ   r_   r{   encoderr   r   rX   Zlayer_norm_epsrZ   	post_initr[   r-   r%   r&   r+   N  s    

zConvNextModel.__init__Zvision)
checkpointoutput_typer   Zmodalityexpected_outputNr]   r   r   r   c                 C   s   |d k	r|n| j j}|d k	r |n| j j}|d kr8td| |}| j|||d}|d }| |ddg}|s||f|dd   S t|||j	dS )Nz You have to specify pixel_valuesr   r   r   r   r   )r   pooler_outputr0   )
r\   r   use_return_dictr^   r_   r   rZ   rK   r   r0   )r,   r]   r   r   embedding_outputZencoder_outputsr   pooled_outputr%   r%   r&   r2   [  s(    
zConvNextModel.forward)NNN)r5   r6   r7   r+   r   CONVNEXT_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr"   ra   r   r   r   r   r2   r<   r%   r%   r-   r&   r   I  s$   	   
r   z
    ConvNext Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                
       sd   e Zd Z fddZeeeeee	e
ddejeej ee ee eeef dddZ  ZS )	ConvNextForImageClassificationc                    sR   t  | |j| _t|| _|jdkr<t|jd |jnt | _	| 
  d S )Nr   r   )r*   r+   
num_labelsr   r   r   rh   rX   rm   
classifierr   r[   r-   r%   r&   r+     s    
$z'ConvNextForImageClassification.__init__)r   r   r   r   N)r]   labelsr   r   r   c                 C   sl  |dk	r|n| j j}| j|||d}|r.|jn|d }| |}d}|dk	r,| j jdkr| jdkrnd| j _n4| jdkr|jtj	ks|jtj
krd| j _nd| j _| j jdkrt }	| jdkr|	| | }n
|	||}nN| j jdkrt }	|	|d| j|d}n| j jdkr,t }	|	||}|s\|f|dd  }
|dk	rX|f|
 S |
S t|||jd	S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   Z
regressionZsingle_label_classificationZmulti_label_classificationr   rJ   )losslogitsr0   )r\   r   r   r   r   Zproblem_typer   r   r"   longintr   Zsqueezer   viewr   r   r0   )r,   r]   r   r   r   outputsr   r   r   Zloss_fctr$   r%   r%   r&   r2     s>    



"


z&ConvNextForImageClassification.forward)NNNN)r5   r6   r7   r+   r   r   r   _IMAGE_CLASS_CHECKPOINTr   r   _IMAGE_CLASS_EXPECTED_OUTPUTr"   ra   r   Z
LongTensorr   r   r   r2   r<   r%   r%   r-   r&   r     s&       
r   zQ
    ConvNeXt backbone, to be used with frameworks like DETR and MaskFormer.
    c                       sP   e Zd Z fddZeeeeedde	j
ee ee edddZ  ZS )	ConvNextBackbonec                    s   t  | t  | t|| _t|| _|jd g|j | _i }t	| j
| jD ]\}}t|dd||< qRt|| _|   d S )Nr   r@   )rF   )r*   r+   Z_init_backbonerQ   r_   r{   r   rX   Znum_featureszipZ_out_featuresZchannelsr=   r   Z
ModuleDicthidden_states_normsr   )r,   r\   r   r   rW   r-   r%   r&   r+     s    

zConvNextBackbone.__init__)r   r   Nr   c                 C   s   |dk	r|n| j j}|dk	r |n| j j}| |}| j|ddd}|j}d}tt| jdd |dd D ].\}\}	}
|	| j	krl| j
|	 |
}
||
f7 }ql|s|f}|r||jf7 }|S t||r|jndddS )az  
        Returns:

        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("facebook/convnext-tiny-224")
        >>> model = AutoBackbone.from_pretrained("facebook/convnext-tiny-224")

        >>> inputs = processor(image, return_tensors="pt")
        >>> outputs = model(**inputs)
        ```NTr   r%   r   )feature_mapsr0   Z
attentions)r\   r   r   r_   r   r0   r   r   Zstage_namesZout_featuresr   r   )r,   r]   r   r   r   r   r0   r   idxr   Zhidden_stater$   r%   r%   r&   r2     s2    
,
zConvNextBackbone.forward)NN)r5   r6   r7   r+   r   r   r   r   r   r"   r:   r   r   r2   r<   r%   r%   r-   r&   r     s   
  r   )r   F)9r8   typingr   r   r   r"   Ztorch.utils.checkpointr   Ztorch.nnr   r   r   Zactivationsr
   Zmodeling_outputsr   r   r   r   Zmodeling_utilsr   utilsr   r   r   r   r   Zutils.backbone_utilsr   Zconfiguration_convnextr   Z
get_loggerr5   loggerr   r   r   r   r   Z&CONVNEXT_PRETRAINED_MODEL_ARCHIVE_LISTr:   r9   r   r'   Moduler(   r=   rQ   rb   ro   r{   r   ZCONVNEXT_START_DOCSTRINGr   r   r   r   r%   r%   r%   r&   <module>   sX   
, /;L