U
    ,-e                     @   s  d Z ddlZddlmZ ddlmZmZmZ ddlZddl	Zddlm
Z
 ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZmZmZmZmZmZmZm Z m!Z! ddl"m#Z# ddl$m%Z% e rddl&m'Z'm(Z( ndd Z(dd Z'e)e*Z+dZ,dZ-ddddgZ.dZ/dZ0dgZ1eG dd deZ2eG dd deZ3eG dd deZ4G d d! d!e
j5Z6G d"d# d#e
j5Z7G d$d% d%e
j5Z8dJej9e:e;ej9d(d)d*Z<G d+d, d,e
j5Z=G d-d. d.e
j5Z>G d/d0 d0e
j5Z?G d1d2 d2e
j5Z@G d3d4 d4e
j5ZAG d5d6 d6e
j5ZBG d7d8 d8e
j5ZCG d9d: d:e
j5ZDG d;d< d<e
j5ZEG d=d> d>eZFd?ZGd@ZHedAeGG dBdC dCeFZIedDeGG dEdF dFeFZJedGeGG dHdI dIeFe#ZKdS )Kz2 PyTorch Neighborhood Attention Transformer model.    N)	dataclass)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BackboneOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)	ModelOutputOptionalDependencyNotAvailableadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardis_natten_availableloggingreplace_return_docstringsrequires_backends)BackboneMixin   )	NatConfig)
natten2davnatten2dqkrpbc                  O   s
   t  d S Nr   argskwargs r#   e/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/transformers/models/nat/modeling_nat.pyr   2   s    r   c                  O   s
   t  d S r   r   r    r#   r#   r$   r   5   s    r   r   zshi-labs/nat-mini-in1k-224   i   z	tiger catc                   @   sb   e Zd ZU dZdZejed< dZe	e
ej  ed< dZe	e
ej  ed< dZe	e
ej  ed< dS )NatEncoderOutputa  
    Nat encoder's outputs, with potential hidden states and attentions.

    Args:
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, hidden_size, height, width)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
            include the spatial dimensions.
    Nlast_hidden_statehidden_states
attentionsreshaped_hidden_states)__name__
__module____qualname____doc__r'   torchFloatTensor__annotations__r(   r   r   r)   r*   r#   r#   r#   r$   r&   O   s
   
r&   c                   @   st   e Zd ZU dZdZejed< dZe	ej ed< dZ
e	eej  ed< dZe	eej  ed< dZe	eej  ed< dS )NatModelOutputaS  
    Nat model's outputs that also contains a pooling of the last hidden states.

    Args:
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*, returned when `add_pooling_layer=True` is passed):
            Average pooling of the last layer hidden-state.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, hidden_size, height, width)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
            include the spatial dimensions.
    Nr'   pooler_outputr(   r)   r*   )r+   r,   r-   r.   r'   r/   r0   r1   r3   r   r(   r   r)   r*   r#   r#   r#   r$   r2   p   s   
r2   c                   @   st   e Zd ZU dZdZeej ed< dZ	ejed< dZ
eeej  ed< dZeeej  ed< dZeeej  ed< dS )NatImageClassifierOutputa   
    Nat outputs for image classification.

    Args:
        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Classification (or regression if config.num_labels==1) loss.
        logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
            Classification (or regression if config.num_labels==1) scores (before SoftMax).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, hidden_size, height, width)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
            include the spatial dimensions.
    Nlosslogitsr(   r)   r*   )r+   r,   r-   r.   r5   r   r/   r0   r1   r6   r(   r   r)   r*   r#   r#   r#   r$   r4      s   
r4   c                       s<   e Zd ZdZ fddZeej eej	 dddZ
  ZS )NatEmbeddingsz6
    Construct the patch and position embeddings.
    c                    s4   t    t|| _t|j| _t|j	| _
d S r   )super__init__NatPatchEmbeddingspatch_embeddingsr   	LayerNorm	embed_dimnormDropouthidden_dropout_probdropoutselfconfig	__class__r#   r$   r9      s    

zNatEmbeddings.__init__pixel_valuesreturnc                 C   s"   |  |}| |}| |}|S r   )r;   r>   rA   )rC   rH   
embeddingsr#   r#   r$   forward   s    


zNatEmbeddings.forward)r+   r,   r-   r.   r9   r   r/   r0   r   TensorrK   __classcell__r#   r#   rE   r$   r7      s   r7   c                       s8   e Zd ZdZ fddZeej ejdddZ	  Z
S )r:   z
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, height, width, hidden_size)` to be consumed by a
    Transformer.
    c              
      sr   t    |j}|j|j }}|| _|dkr.ntdttj| j|d ddddtj|d |dddd| _	d S )N   z2Dinat only supports patch size of 4 at the moment.   r
   r
   rO   rO   r   r   )kernel_sizestridepadding)
r8   r9   
patch_sizenum_channelsr=   
ValueErrorr   Z
SequentialConv2d
projection)rC   rD   rV   rW   Zhidden_sizerE   r#   r$   r9      s    
zNatPatchEmbeddings.__init__rG   c                 C   s>   |j \}}}}|| jkr td| |}|dddd}|S )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r   rO   r
   r   )shaperW   rX   rZ   permute)rC   rH   _rW   heightwidthrJ   r#   r#   r$   rK      s    

zNatPatchEmbeddings.forward)r+   r,   r-   r.   r9   r   r/   r0   rL   rK   rM   r#   r#   rE   r$   r:      s   r:   c                       sF   e Zd ZdZejfeejdd fddZe	j
e	j
dddZ  ZS )	NatDownsamplerz
    Convolutional Downsampling Layer.

    Args:
        dim (`int`):
            Number of input channels.
        norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
            Normalization layer class.
    N)dim
norm_layerrI   c                    s>   t    || _tj|d| ddddd| _|d| | _d S )NrO   rP   rQ   rR   F)rS   rT   rU   bias)r8   r9   ra   r   rY   	reductionr>   )rC   ra   rb   rE   r#   r$   r9      s    
zNatDownsampler.__init__)input_featurerI   c                 C   s0   |  |dddddddd}| |}|S )Nr   r
   r   rO   )rd   r\   r>   )rC   re   r#   r#   r$   rK     s    "
zNatDownsampler.forward)r+   r,   r-   r.   r   r<   intModuler9   r/   rL   rK   rM   r#   r#   rE   r$   r`      s   
r`           F)input	drop_probtrainingrI   c                 C   sd   |dks|s| S d| }| j d fd| jd   }|tj|| j| jd }|  | || }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    rh   r   r   )r   )dtypedevice)r[   ndimr/   Zrandrl   rm   Zfloor_div)ri   rj   rk   Z	keep_probr[   Zrandom_tensoroutputr#   r#   r$   	drop_path
  s    
rq   c                       sP   e Zd ZdZdee dd fddZejejdddZ	e
d	d
dZ  ZS )NatDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).N)rj   rI   c                    s   t    || _d S r   )r8   r9   rj   )rC   rj   rE   r#   r$   r9   "  s    
zNatDropPath.__init__r(   rI   c                 C   s   t || j| jS r   )rq   rj   rk   rC   r(   r#   r#   r$   rK   &  s    zNatDropPath.forward)rI   c                 C   s   d | jS )Nzp={})formatrj   rC   r#   r#   r$   
extra_repr)  s    zNatDropPath.extra_repr)N)r+   r,   r-   r.   r   floatr9   r/   rL   rK   strrw   rM   r#   r#   rE   r$   rr     s   rr   c                       sD   e Zd Z fddZdd Zd	ejee e	ej dddZ
  ZS )
NeighborhoodAttentionc                    s   t    || dkr,td| d| d|| _t|| | _| j| j | _|| _t	t
|d| j d d| j d | _tj| j| j|jd| _tj| j| j|jd| _tj| j| j|jd| _t|j| _d S )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()rO   r   )rc   )r8   r9   rX   num_attention_headsrf   attention_head_sizeall_head_sizerS   r   	Parameterr/   ZzerosrpbLinearZqkv_biasquerykeyvaluer?   attention_probs_dropout_probrA   rC   rD   ra   	num_headsrS   rE   r#   r$   r9   .  s    
*zNeighborhoodAttention.__init__c                 C   s8   |  d d | j| jf }||}|dddddS )Nr   r
   r   rO   rN   )sizer|   r}   viewr\   )rC   xZnew_x_shaper#   r#   r$   transpose_for_scoresC  s    
z*NeighborhoodAttention.transpose_for_scoresFr(   output_attentionsrI   c                 C   s   |  | |}|  | |}|  | |}|t| j }t||| j| j	d}t
jj|dd}| |}t||| j	d}|ddddd }| d d | jf }	||	}|r||fn|f}
|
S )	Nr   r   ra   r   rO   r
   rN   )r   r   r   r   mathsqrtr}   r   r   rS   r   
functionalZsoftmaxrA   r   r\   
contiguousr   r~   r   )rC   r(   r   Zquery_layerZ	key_layerZvalue_layerZattention_scoresZattention_probsZcontext_layerZnew_context_layer_shapeoutputsr#   r#   r$   rK   H  s    

zNeighborhoodAttention.forward)F)r+   r,   r-   r9   r   r/   rL   r   boolr   rK   rM   r#   r#   rE   r$   rz   -  s    rz   c                       s4   e Zd Z fddZejejejdddZ  ZS )NeighborhoodAttentionOutputc                    s*   t    t||| _t|j| _d S r   )r8   r9   r   r   denser?   r   rA   rC   rD   ra   rE   r#   r$   r9   k  s    
z$NeighborhoodAttentionOutput.__init__)r(   input_tensorrI   c                 C   s   |  |}| |}|S r   r   rA   )rC   r(   r   r#   r#   r$   rK   p  s    

z#NeighborhoodAttentionOutput.forwardr+   r,   r-   r9   r/   rL   rK   rM   r#   r#   rE   r$   r   j  s   r   c                       sD   e Zd Z fddZdd Zd	ejee e	ej dddZ
  ZS )
NeighborhoodAttentionModulec                    s2   t    t||||| _t||| _t | _d S r   )r8   r9   rz   rC   r   rp   setpruned_headsr   rE   r#   r$   r9   x  s    
z$NeighborhoodAttentionModule.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   r   )lenr   rC   r|   r}   r   r   r   r   r   rp   r   r~   union)rC   headsindexr#   r#   r$   prune_heads~  s       z'NeighborhoodAttentionModule.prune_headsFr   c                 C   s2   |  ||}| |d |}|f|dd   }|S Nr   r   )rC   rp   )rC   r(   r   Zself_outputsattention_outputr   r#   r#   r$   rK     s    z#NeighborhoodAttentionModule.forward)F)r+   r,   r-   r9   r   r/   rL   r   r   r   rK   rM   r#   r#   rE   r$   r   w  s    r   c                       s0   e Zd Z fddZejejdddZ  ZS )NatIntermediatec                    sH   t    t|t|j| | _t|jt	r<t
|j | _n|j| _d S r   )r8   r9   r   r   rf   	mlp_ratior   
isinstanceZ
hidden_actry   r   intermediate_act_fnr   rE   r#   r$   r9     s
    
zNatIntermediate.__init__rs   c                 C   s   |  |}| |}|S r   )r   r   rt   r#   r#   r$   rK     s    

zNatIntermediate.forwardr   r#   r#   rE   r$   r     s   r   c                       s0   e Zd Z fddZejejdddZ  ZS )	NatOutputc                    s4   t    tt|j| || _t|j| _	d S r   )
r8   r9   r   r   rf   r   r   r?   r@   rA   r   rE   r#   r$   r9     s    
zNatOutput.__init__rs   c                 C   s   |  |}| |}|S r   r   rt   r#   r#   r$   rK     s    

zNatOutput.forwardr   r#   r#   rE   r$   r     s   r   c                       sL   e Zd Zd
 fdd	Zdd Zdejee e	ejejf ddd	Z
  ZS )NatLayerrh   c                    s   t    |j| _|j| _tj||jd| _t|||| jd| _	|dkrPt
|nt | _tj||jd| _t||| _t||| _|jdkrtj|jtd|f ddnd | _d S )NZeps)rS   rh   r   rO   T)Zrequires_grad)r8   r9   Zchunk_size_feed_forwardrS   r   r<   layer_norm_epslayernorm_beforer   	attentionrr   Identityrq   layernorm_afterr   intermediater   rp   Zlayer_scale_init_valuer   r/   Zoneslayer_scale_parameters)rC   rD   ra   r   drop_path_raterE   r#   r$   r9     s    
 zNatLayer.__init__c           
      C   sd   | j }d}||k s||k r\d }}td|| }td|| }	dd||||	f}tj||}||fS )N)r   r   r   r   r   r   r   )rS   maxr   r   pad)
rC   r(   r^   r_   Zwindow_size
pad_valuesZpad_lZpad_tZpad_rZpad_br#   r#   r$   	maybe_pad  s    zNatLayer.maybe_padFr   c                 C   s  |  \}}}}|}| |}| |||\}}|j\}	}
}}	| j||d}|d }|d dkpj|d dk}|r|d d d |d |d d f  }| jd k	r| jd | }|| | }| |}| 	| 
|}| jd k	r| jd | }|| | }|r||d fn|f}|S )N)r   r   r
      r   )r   r   r   r[   r   r   r   rq   r   rp   r   )rC   r(   r   
batch_sizer^   r_   channelsZshortcutr   r]   Z
height_padZ	width_padZattention_outputsr   Z
was_paddedZlayer_outputlayer_outputsr#   r#   r$   rK     s(    
$


zNatLayer.forward)rh   )F)r+   r,   r-   r9   r   r/   rL   r   r   r   rK   rM   r#   r#   rE   r$   r     s    r   c                       s<   e Zd Z fddZdejee eej dddZ	  Z
S )NatStagec                    sd   t     | _| _t fddt|D | _|d k	rT|tjd| _	nd | _	d| _
d S )Nc                    s    g | ]}t  | d qS ))rD   ra   r   r   )r   .0irD   ra   r   r   r#   r$   
<listcomp>  s   z%NatStage.__init__.<locals>.<listcomp>)ra   rb   F)r8   r9   rD   ra   r   
ModuleListrangelayersr<   
downsampleZpointing)rC   rD   ra   depthr   r   r   rE   r   r$   r9     s    
zNatStage.__init__Fr   c                 C   sn   |  \}}}}t| jD ]\}}|||}|d }q|}	| jd k	rN| |	}||	f}
|rj|
|dd  7 }
|
S r   )r   	enumerater   r   )rC   r(   r   r]   r^   r_   r   layer_moduler   !hidden_states_before_downsamplingZstage_outputsr#   r#   r$   rK     s    



zNatStage.forward)F)r+   r,   r-   r9   r/   rL   r   r   r   rK   rM   r#   r#   rE   r$   r     s    r   c                	       sP   e Zd Z fddZdejee ee ee ee ee	e
f dddZ  ZS )	
NatEncoderc                    sd   t    t j_ _dd td jt	 jD t
 fddtjD _d S )Nc                 S   s   g | ]}|  qS r#   )item)r   r   r#   r#   r$   r   ,  s     z'NatEncoder.__init__.<locals>.<listcomp>r   c                    st   g | ]l}t  t jd |   j|  j| t jd| t jd|d   |jd k rhtnddqS )rO   Nr   )rD   ra   r   r   r   r   )r   rf   r=   depthsr   sum
num_levelsr`   )r   Zi_layerrD   ZdprrC   r#   r$   r   .  s   	*)r8   r9   r   r   r   rD   r/   Zlinspacer   r   r   r   r   levelsrB   rE   r   r$   r9   (  s    
 	zNatEncoder.__init__FT)r(   r   output_hidden_states(output_hidden_states_before_downsamplingreturn_dictrI   c                 C   s  |rdnd }|rdnd }|r dnd }|rL| dddd}	||f7 }||	f7 }t| jD ]\}
}|||}|d }|d }|r|r| dddd}	||f7 }||	f7 }n,|r|s| dddd}	||f7 }||	f7 }|rV||dd  7 }qV|stdd |||fD S t||||dS )	Nr#   r   r
   r   rO   c                 s   s   | ]}|d k	r|V  qd S r   r#   )r   vr#   r#   r$   	<genexpr>b  s      z%NatEncoder.forward.<locals>.<genexpr>)r'   r(   r)   r*   )r\   r   r   tupler&   )rC   r(   r   r   r   r   Zall_hidden_statesZall_reshaped_hidden_statesZall_self_attentionsZreshaped_hidden_stater   r   r   r   r#   r#   r$   rK   ;  s:    





zNatEncoder.forward)FFFT)r+   r,   r-   r9   r/   rL   r   r   r   r   r&   rK   rM   r#   r#   rE   r$   r   '  s       
r   c                   @   s8   e Zd ZdZeZdZdZdd Zde	e
ddd	d
ZdS )NatPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    natrH   c                 C   sj   t |tjtjfr@|jjjd| jjd |j	dk	rf|j	j
  n&t |tjrf|j	j
  |jjd dS )zInitialize the weightsrh   )ZmeanZstdNg      ?)r   r   r   rY   weightdataZnormal_rD   Zinitializer_rangerc   Zzero_r<   Zfill_)rC   moduler#   r#   r$   _init_weightsv  s    
z NatPreTrainedModel._init_weightsFN)r   r   rI   c                 C   s   d S r   r#   )rC   r   r   r#   r#   r$   _set_gradient_checkpointing  s    z.NatPreTrainedModel._set_gradient_checkpointing)F)r+   r,   r-   r.   r   config_classZbase_model_prefixZmain_input_namer   r   r   r   r#   r#   r#   r$   r   l  s   r   aF  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`NatConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See [`ViTImageProcessor.__call__`]
            for details.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
z]The bare Nat Model transformer outputting raw hidden-states without any specific head on top.c                
       sz   e Zd Zd fdd	Zdd Zdd Zeeee	e
eded	deej ee ee ee eee
f dddZ  ZS )NatModelTc                    s   t  | t| dg || _t|j| _t|jd| jd   | _	t
|| _t|| _tj| j	|jd| _|rztdnd | _|   d S )NnattenrO   r   r   )r8   r9   r   rD   r   r   r   rf   r=   num_featuresr7   rJ   r   encoderr   r<   r   	layernormZAdaptiveAvgPool1dpooler	post_init)rC   rD   Zadd_pooling_layerrE   r#   r$   r9     s    

zNatModel.__init__c                 C   s   | j jS r   rJ   r;   rv   r#   r#   r$   get_input_embeddings  s    zNatModel.get_input_embeddingsc                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr   layerr   r   )rC   Zheads_to_pruner   r   r#   r#   r$   _prune_heads  s    zNatModel._prune_headsZvision)
checkpointoutput_typer   Zmodalityexpected_outputN)rH   r   r   r   rI   c           
      C   s   |d k	r|n| j j}|d k	r |n| j j}|d k	r4|n| j j}|d krLtd| |}| j||||d}|d }| |}d }| jd k	r| |	dd
dd}t	|d}|s||f|dd   }	|	S t|||j|j|jdS )Nz You have to specify pixel_valuesr   r   r   r   r   rO   )r'   r3   r(   r)   r*   )rD   r   r   use_return_dictrX   rJ   r   r   r   flattenZ	transposer/   r2   r(   r)   r*   )
rC   rH   r   r   r   embedding_outputZencoder_outputsZsequence_outputpooled_outputrp   r#   r#   r$   rK     s:    


zNatModel.forward)T)NNNN)r+   r,   r-   r9   r   r   r   NAT_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr2   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr   r/   r0   r   r   r   rK   rM   r#   r#   rE   r$   r     s,   	    
r   z
    Nat Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    c                       sn   e Zd Z fddZeeeeee	e
ddeej eej ee ee ee eeef dddZ  ZS )	NatForImageClassificationc                    s\   t  | t| dg |j| _t|| _|jdkrFt| jj|jnt	 | _
|   d S )Nr   r   )r8   r9   r   
num_labelsr   r   r   r   r   r   
classifierr   rB   rE   r#   r$   r9     s    
"z"NatForImageClassification.__init__)r   r   r   r   N)rH   labelsr   r   r   rI   c                 C   sl  |dk	r|n| j j}| j||||d}|d }| |}d}	|dk	r$| j jdkr| jdkrfd| j _n4| jdkr|jtjks|jtj	krd| j _nd| j _| j jdkrt
 }
| jdkr|
| | }	n
|
||}	nN| j jdkrt }
|
|d| j|d}	n| j jdkr$t }
|
||}	|sT|f|dd  }|	dk	rP|	f| S |S t|	||j|j|jd	S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   Z
regressionZsingle_label_classificationZmulti_label_classificationr   rO   )r5   r6   r(   r)   r*   )rD   r   r   r   Zproblem_typer   rl   r/   longrf   r	   Zsqueezer   r   r   r4   r(   r)   r*   )rC   rH   r   r   r   r   r   r   r6   r5   Zloss_fctrp   r#   r#   r$   rK     sL    



"


z!NatForImageClassification.forward)NNNNN)r+   r,   r-   r9   r   r   r   _IMAGE_CLASS_CHECKPOINTr4   r   _IMAGE_CLASS_EXPECTED_OUTPUTr   r/   r0   Z
LongTensorr   r   r   rK   rM   r#   r#   rE   r$   r     s*        
r   zBNAT backbone, to be used with frameworks like DETR and MaskFormer.c                	       s^   e Zd Z fddZdd Zeeeee	dd
e
jee ee ee eddd	Z  ZS )NatBackbonec                    s   t    t    t| dg t | _t | _ jg fddt	t
 jD  | _i }t| j| jD ]\}}t|||< qpt|| _|   d S )Nr   c                    s   g | ]}t  jd |  qS )rO   )rf   r=   r   rD   r#   r$   r   f  s     z(NatBackbone.__init__.<locals>.<listcomp>)r8   r9   Z_init_backboner   r7   rJ   r   r   r=   r   r   r   r   zipout_featuresr   r   r<   Z
ModuleDicthidden_states_normsr   )rC   rD   r  stagerW   rE   r  r$   r9   ^  s    

&zNatBackbone.__init__c                 C   s   | j jS r   r   rv   r#   r#   r$   r   q  s    z NatBackbone.get_input_embeddings)r   r   N)rH   r   r   r   rI   c                 C   s2  |dk	r|n| j j}|dk	r |n| j j}|dk	r4|n| j j}| |}| j||dddd}|j}d}t| j|D ]\}	}
|	| j	krp|
j
\}}}}|
dddd }
|
||| |}
| j|	 |
}
|
||||}
|
dddd }
||
f7 }qp|s|f}|r||jf7 }|S t||r&|jnd|jd	S )
aA  
        Returns:

        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("shi-labs/nat-mini-in1k-224")
        >>> model = AutoBackbone.from_pretrained(
        ...     "shi-labs/nat-mini-in1k-224", out_features=["stage1", "stage2", "stage3", "stage4"]
        ... )

        >>> inputs = processor(image, return_tensors="pt")

        >>> outputs = model(**inputs)

        >>> feature_maps = outputs.feature_maps
        >>> list(feature_maps[-1].shape)
        [1, 512, 7, 7]
        ```NT)r   r   r   r   r#   r   rO   r
   r   )feature_mapsr(   r)   )rD   r   r   r   rJ   r   r*   r  Zstage_namesr  r[   r\   r   r   r  r(   r   r)   )rC   rH   r   r   r   r   r   r(   r  r  Zhidden_stater   rW   r^   r_   rp   r#   r#   r$   rK   t  sB    $

zNatBackbone.forward)NNN)r+   r,   r-   r9   r   r   r   r   r   r   r/   rL   r   r   rK   rM   r#   r#   rE   r$   r   Y  s   
   r   )rh   F)Lr.   r   dataclassesr   typingr   r   r   r/   Ztorch.utils.checkpointr   Ztorch.nnr   r   r	   Zactivationsr   Zmodeling_outputsr   Zmodeling_utilsr   Zpytorch_utilsr   r   utilsr   r   r   r   r   r   r   r   r   Zutils.backbone_utilsr   Zconfiguration_natr   Znatten.functionalr   r   Z
get_loggerr+   loggerr   r   r   r   r   Z!NAT_PRETRAINED_MODEL_ARCHIVE_LISTr&   r2   r4   rg   r7   r:   r`   rL   rx   r   rq   rr   rz   r   r   r   r   r   r   r   r   ZNAT_START_DOCSTRINGr   r   r   r   r#   r#   r#   r$   <module>   s~   ,
 ##$=$C.EUW