U
    ,-e&                     @   s  d Z ddlZddlZddlmZ ddlmZmZm	Z	 ddl
Z
ddlZ
ddl
mZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZmZ ddlmZmZmZmZmZmZ ddlm Z  e!e"Z#dZ$dZ%ddddgZ&dgZ'eG dd deZ(dd Z)dd Z*dLe
j+e,e-e
j+dddZ.G dd dej/Z0G d d! d!ej/Z1G d"d# d#ej/Z2G d$d% d%ej/Z3G d&d' d'ej/Z4G d(d) d)ej/Z5G d*d+ d+ej/Z6G d,d- d-ej/Z7G d.d/ d/ej/Z8G d0d1 d1ej/Z9G d2d3 d3ej/Z:G d4d5 d5ej/Z;G d6d7 d7ej/Z<G d8d9 d9eZ=d:Z>d;Z?ed<e>G d=d> d>e=Z@G d?d@ d@ej/ZAG dAdB dBej/ZBG dCdD dDej/ZCG dEdF dFej/ZDG dGdH dHej/ZEedIe>G dJdK dKe=ZFdS )Mz# PyTorch Swin2SR Transformer model.    N)	dataclass)OptionalTupleUnion)nn   )ACT2FN)BaseModelOutputImageSuperResolutionOutput)PreTrainedModel) find_pruneable_heads_and_indicesmeshgridprune_linear_layer)ModelOutputadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )Swin2SRConfigr   z!caidas/swin2SR-classical-sr-x2-64   i  i  c                   @   sL   e Zd ZU dZdZejed< dZe	e
ej  ed< dZe	e
ej  ed< dS )Swin2SREncoderOutputa  
    Swin2SR encoder's outputs, with potential hidden states and attentions.

    Args:
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nlast_hidden_statehidden_states
attentions)__name__
__module____qualname____doc__r   torchFloatTensor__annotations__r   r   r   r    r#   r#   m/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/transformers/models/swin2sr/modeling_swin2sr.pyr   :   s   
r   c                 C   sR   | j \}}}}| ||| ||| ||} | dddddd d|||}|S )z2
    Partitions the given input into windows.
    r   r   r            shapeviewpermute
contiguous)input_featurewindow_size
batch_sizeheightwidthnum_channelswindowsr#   r#   r$   window_partitionU   s         $r5   c                 C   sN   | j d }| d|| || |||} | dddddd d|||} | S )z?
    Merges windows to produce higher resolution features.
    r(   r   r   r   r%   r&   r'   r)   )r4   r/   r1   r2   r3   r#   r#   r$   window_reverseb   s    
$r6           F)input	drop_probtrainingreturnc                 C   sd   |dks|s| S d| }| j d fd| jd   }|tj|| j| jd }|  | || }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r7   r   r   )r   )dtypedevice)r*   ndimr    Zrandr<   r=   Zfloor_div)r8   r9   r:   Z	keep_probr*   Zrandom_tensoroutputr#   r#   r$   	drop_pathm   s    
rA   c                       sP   e Zd ZdZdee dd fddZejejdddZ	e
d	d
dZ  ZS )Swin2SRDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).N)r9   r;   c                    s   t    || _d S N)super__init__r9   )selfr9   	__class__r#   r$   rE      s    
zSwin2SRDropPath.__init__r   r;   c                 C   s   t || j| jS rC   )rA   r9   r:   rF   r   r#   r#   r$   forward   s    zSwin2SRDropPath.forward)r;   c                 C   s   d | jS )Nzp={})formatr9   rF   r#   r#   r$   
extra_repr   s    zSwin2SRDropPath.extra_repr)N)r   r   r   r   r   floatrE   r    TensorrK   strrN   __classcell__r#   r#   rG   r$   rB      s   rB   c                       s<   e Zd ZdZ fddZeej eej	 dddZ
  ZS )Swin2SREmbeddingsz?
    Construct the patch and optional position embeddings.
    c                    s`   t    t|| _| jj}|jr@tt	d|d |j
| _nd | _t|j| _|j| _d S )Nr   )rD   rE   Swin2SRPatchEmbeddingspatch_embeddingsnum_patchesZuse_absolute_embeddingsr   	Parameterr    zeros	embed_dimposition_embeddingsDropouthidden_dropout_probdropoutr/   )rF   configrV   rG   r#   r$   rE      s    

zSwin2SREmbeddings.__init__)pixel_valuesr;   c                 C   s4   |  |\}}| jd k	r"|| j }| |}||fS rC   )rU   rZ   r]   )rF   r_   
embeddingsoutput_dimensionsr#   r#   r$   rK      s
    


zSwin2SREmbeddings.forward)r   r   r   r   rE   r   r    r!   r   rP   rK   rR   r#   r#   rG   r$   rS      s   rS   c                       sB   e Zd Zd fdd	Zeej eejee	 f dddZ
  ZS )rT   Tc                    s   t    |j}|j|j }}t|tjjr0|n||f}t|tjjrJ|n||f}|d |d  |d |d  g}|| _	|d |d  | _
tj||j||d| _|rt|jnd | _d S )Nr   r   )Zkernel_sizeZstride)rD   rE   rY   
image_size
patch_size
isinstancecollectionsabcIterablepatches_resolutionrV   r   Conv2d
projection	LayerNorm	layernorm)rF   r^   normalize_patchesr3   rb   rc   rh   rG   r#   r$   rE      s    
 zSwin2SRPatchEmbeddings.__init__)r`   r;   c                 C   sN   |  |}|j\}}}}||f}|ddd}| jd k	rF| |}||fS )Nr%   r   )rj   r*   flatten	transposerl   )rF   r`   _r1   r2   ra   r#   r#   r$   rK      s    


zSwin2SRPatchEmbeddings.forward)T)r   r   r   rE   r   r    r!   r   rP   intrK   rR   r#   r#   rG   r$   rT      s   rT   c                       s(   e Zd ZdZ fddZdd Z  ZS )Swin2SRPatchUnEmbeddingszImage to Patch Unembeddingc                    s   t    |j| _d S rC   )rD   rE   rY   )rF   r^   rG   r#   r$   rE      s    
z!Swin2SRPatchUnEmbeddings.__init__c                 C   s2   |j \}}}|dd|| j|d |d }|S )Nr   r%   r   )r*   ro   r+   rY   )rF   r`   Zx_sizer0   Zheight_widthr3   r#   r#   r$   rK      s    "z Swin2SRPatchUnEmbeddings.forwardr   r   r   r   rE   rK   rR   r#   r#   rG   r$   rr      s   rr   c                       s^   e Zd ZdZejfee eejdd fddZ	dd Z
ejeeef ejdd	d
Z  ZS )Swin2SRPatchMerginga'  
    Patch Merging Layer.

    Args:
        input_resolution (`Tuple[int]`):
            Resolution of input feature.
        dim (`int`):
            Number of input channels.
        norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
            Normalization layer class.
    N)input_resolutiondim
norm_layerr;   c                    sB   t    || _|| _tjd| d| dd| _|d| | _d S )Nr&   r%   Fbias)rD   rE   ru   rv   r   Linear	reductionnorm)rF   ru   rv   rw   rG   r#   r$   rE      s
    
zSwin2SRPatchMerging.__init__c                 C   sF   |d dkp|d dk}|rBddd|d d|d f}t j||}|S )Nr%   r   r   )r   
functionalpad)rF   r.   r1   r2   Z
should_pad
pad_valuesr#   r#   r$   	maybe_pad   s
    zSwin2SRPatchMerging.maybe_pad)r.   input_dimensionsr;   c                 C   s   |\}}|j \}}}|||||}| |||}|d d dd ddd dd d f }|d d dd ddd dd d f }	|d d dd ddd dd d f }
|d d dd ddd dd d f }t||	|
|gd}||dd| }| |}| |}|S )Nr   r%   r   r(   r&   )r*   r+   r   r    catr{   r|   )rF   r.   r   r1   r2   r0   rv   r3   Zinput_feature_0Zinput_feature_1Zinput_feature_2Zinput_feature_3r#   r#   r$   rK      s    $$$$

zSwin2SRPatchMerging.forward)r   r   r   r   r   rk   r   rq   ModulerE   r   r    rP   rK   rR   r#   r#   rG   r$   rt      s   $rt   c                       s\   e Zd Zddgf fdd	Zdd Zdejeej eej ee	 e
ej dd	d
Z  ZS )Swin2SRSelfAttentionr   c              
      s  t    || dkr,td| d| d|| _t|| | _| j| j | _t|tj	j
r`|n||f| _|| _ttdt|ddf | _ttjddd	d
tjd	dtjd|dd
| _tj| jd d  | jd tjd}tj| jd d  | jd tjd}tt||gddddd d}|d dkr|d d d d d d df  |d d   < |d d d d d d df  |d d   < n\|d d d d d d df  | jd d   < |d d d d d d df  | jd d   < |d9 }t|tt |d  t!d }| j"d|dd t| jd }	t| jd }
tt|	|
gdd}t#|d}|d d d d d f |d d d d d f  }|ddd }|d d d d df  | jd d 7  < |d d d d df  | jd d 7  < |d d d d df  d| jd  d 9  < |$d}| j"d|dd tj| j| j|j%d
| _&tj| j| jdd
| _'tj| j| j|j%d
| _(t)|j*| _+d S )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()
   r   r%   i   Trx   inplaceFr<   Zij)Zindexing         ?relative_coords_table)
persistentr(   relative_position_index),rD   rE   
ValueErrornum_attention_headsrq   attention_head_sizeall_head_sizerd   re   rf   rg   r/   pretrained_window_sizer   rW   r    logZoneslogit_scale
Sequentialrz   ZReLUcontinuous_position_bias_mlpZarangeZfloat32stackr   r,   r-   	unsqueezesignlog2absmathZregister_bufferrn   sumZqkv_biasquerykeyvaluer[   attention_probs_dropout_probr]   )rF   r^   rv   	num_headsr/   r   Zrelative_coords_hZrelative_coords_wr   Zcoords_hZcoords_wZcoordsZcoords_flattenZrelative_coordsr   rG   r#   r$   rE     sf    
" 
 $$  ,...&,((,
zSwin2SRSelfAttention.__init__c                 C   s6   |  d d | j| jf }||}|ddddS )Nr(   r   r%   r   r   )sizer   r   r+   r,   )rF   xZnew_x_shaper#   r#   r$   transpose_for_scoresK  s    
z)Swin2SRSelfAttention.transpose_for_scoresNFr   attention_mask	head_maskoutput_attentionsr;   c                 C   s  |j \}}}| |}| | |}	| | |}
| |}tjj|ddtjj|	dddd }t	j
| jtdd }|| }| | jd| j}|| jd | jd | jd  | jd | jd  d}|ddd }d	t	| }||d }|d k	rj|j d }||| || j|||dd }||dd }|d| j||}tjj|dd}| |}|d k	r|| }t	||
}|dddd
 }| d d | jf }||}|r||fn|f}|S )Nr(   rv   g      Y@)maxr   r   r%      r   )r*   r   r   r   r   r   r}   	normalizero   r    clampr   r   r   expr   r   r+   r   r   r/   r,   r-   Zsigmoidr   Zsoftmaxr]   matmulr   r   )rF   r   r   r   r   r0   rv   r3   Zmixed_query_layerZ	key_layerZvalue_layerZquery_layerZattention_scoresr   Zrelative_position_bias_tableZrelative_position_biasZ
mask_shapeZattention_probsZcontext_layerZnew_context_layer_shapeoutputsr#   r#   r$   rK   P  sb    

     

    


zSwin2SRSelfAttention.forward)NNF)r   r   r   rE   r   r    rP   r   r!   boolr   rK   rR   r#   r#   rG   r$   r     s   ;   r   c                       s4   e Zd Z fddZejejejdddZ  ZS )Swin2SRSelfOutputc                    s*   t    t||| _t|j| _d S rC   )rD   rE   r   rz   denser[   r   r]   rF   r^   rv   rG   r#   r$   rE     s    
zSwin2SRSelfOutput.__init__)r   input_tensorr;   c                 C   s   |  |}| |}|S rC   r   r]   )rF   r   r   r#   r#   r$   rK     s    

zSwin2SRSelfOutput.forwardr   r   r   rE   r    rP   rK   rR   r#   r#   rG   r$   r     s   r   c                       sV   e Zd Zd fdd	Zdd Zdejeej eej ee	 e
ej dd	d
Z  ZS )Swin2SRAttentionr   c                    sL   t    t||||t|tjjr&|n||fd| _t||| _	t
 | _d S )Nr^   rv   r   r/   r   )rD   rE   r   rd   re   rf   rg   rF   r   r@   setpruned_heads)rF   r^   rv   r   r/   r   rG   r#   r$   rE     s    
	zSwin2SRAttention.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   r   )lenr   rF   r   r   r   r   r   r   r   r@   r   r   union)rF   headsindexr#   r#   r$   prune_heads  s       zSwin2SRAttention.prune_headsNFr   c                 C   s6   |  ||||}| |d |}|f|dd   }|S Nr   r   )rF   r@   )rF   r   r   r   r   Zself_outputsattention_outputr   r#   r#   r$   rK     s    zSwin2SRAttention.forward)r   )NNF)r   r   r   rE   r   r    rP   r   r!   r   r   rK   rR   r#   r#   rG   r$   r     s      r   c                       s0   e Zd Z fddZejejdddZ  ZS )Swin2SRIntermediatec                    sH   t    t|t|j| | _t|jt	r<t
|j | _n|j| _d S rC   )rD   rE   r   rz   rq   	mlp_ratior   rd   Z
hidden_actrQ   r   intermediate_act_fnr   rG   r#   r$   rE     s
    
zSwin2SRIntermediate.__init__rI   c                 C   s   |  |}| |}|S rC   )r   r   rJ   r#   r#   r$   rK     s    

zSwin2SRIntermediate.forwardr   r#   r#   rG   r$   r     s   r   c                       s0   e Zd Z fddZejejdddZ  ZS )Swin2SROutputc                    s4   t    tt|j| || _t|j| _	d S rC   )
rD   rE   r   rz   rq   r   r   r[   r\   r]   r   rG   r#   r$   rE     s    
zSwin2SROutput.__init__rI   c                 C   s   |  |}| |}|S rC   r   rJ   r#   r#   r$   rK     s    

zSwin2SROutput.forwardr   r#   r#   rG   r$   r     s   r   c                	       st   e Zd Zd fdd	Zdd Zdd Zdd	 Zdeje	e
e
f eej ee ee e	ejejf dddZ  ZS )Swin2SRLayerr   c                    s   t    |j| _|| _|j| _|| _| | t|||| jt|t	j
jrN|n||fd| _tj||jd| _|jdkrt|jnt | _t||| _t||| _tj||jd| _d S )Nr   Zepsr7   )rD   rE   Zchunk_size_feed_forward
shift_sizer/   ru   set_shift_and_window_sizer   rd   re   rf   rg   	attentionr   rk   layer_norm_epslayernorm_beforedrop_path_raterB   ZIdentityrA   r   intermediater   r@   layernorm_after)rF   r^   rv   ru   r   r   r   rG   r#   r$   rE     s(    

	zSwin2SRLayer.__init__c                 C   s   t | jtjjr| jn
| j| jf}t | jtjjr8| jn
| j| jf}t|d r^|d  n|d }||d krv|n|d | _|t | jtjjr| jn
| j| jfkrdn|d | _d S Nr   )	rd   r/   re   rf   rg   r   r    Z	is_tensoritem)rF   ru   Ztarget_window_sizeZtarget_shift_sizeZ
window_dimr#   r#   r$   r      s$    

"
z&Swin2SRLayer.set_shift_and_window_sizec              	   C   s  | j dkrtjd||df|d}td| j t| j | j  t| j  d f}td| j t| j | j  t| j  d f}d}|D ].}|D ]$}	||d d ||	d d f< |d7 }qqt|| j}
|
d| j| j }
|
d|
d }||dkt	d|dkt	d}nd }|S )Nr   r   r   r(   r%   g      Yr7   )
r   r    rX   slicer/   r5   r+   r   Zmasked_fillrO   )rF   r1   r2   r<   Zimg_maskZheight_slicesZwidth_slicescountZheight_sliceZwidth_sliceZmask_windows	attn_maskr#   r#   r$   get_attn_mask  s*    &zSwin2SRLayer.get_attn_maskc                 C   sR   | j || j   | j  }| j || j   | j  }ddd|d|f}tj||}||fS r   )r/   r   r}   r~   )rF   r   r1   r2   	pad_rightZ
pad_bottomr   r#   r#   r$   r   4  s
    zSwin2SRLayer.maybe_padNF)r   r   r   r   always_partitionr;   c                 C   s  |s|  | n |\}}| \}}	}
|}|||||
}| |||\}}|j\}	}}}	| jdkrtj|| j | j fdd}n|}t|| j	}|d| j	| j	 |
}| j
|||jd}|d k	r||j}| j||||d}|d }|d| j	| j	|
}t|| j	||}| jdkr2tj|| j| jfdd}n|}|d dkpN|d dk}|rz|d d d |d |d d f  }|||| |
}| |}|| | }| |}| |}|| | | }|r||d	 fn|f}|S )
Nr   )r   r%   )ZshiftsZdimsr(   r   r   r   r'   r   )r   r   r+   r   r*   r   r    Zrollr5   r/   r   r<   tor=   r   r6   r-   r   rA   r   r@   r   )rF   r   r   r   r   r   r1   r2   r0   rp   ZchannelsZshortcutr   Z
height_padZ	width_padZshifted_hidden_statesZhidden_states_windowsr   Zattention_outputsr   Zattention_windowsZshifted_windowsZ
was_paddedZlayer_outputlayer_outputsr#   r#   r$   rK   ;  sN    
   $


zSwin2SRLayer.forward)r   r   )NFF)r   r   r   rE   r   r   r   r    rP   r   rq   r   r!   r   rK   rR   r#   r#   rG   r$   r     s      
r   c                       sT   e Zd ZdZd
 fdd	Zdejeeef e	ej
 e	e eej ddd	Z  ZS )Swin2SRStagezh
    This corresponds to the Residual Swin Transformer Block (RSTB) in the original implementation.
    r   c                    s   t     | _| _t fddt|D | _ jdkr\t	ddd| _
nl jdkrtt	d dddtjdd	d
t	d d dddtjdd	d
t	d ddd| _
t dd| _t | _d S )Nc              
      s6   g | ].}t  |d  dkr"dn jd  dqS )r%   r   )r^   rv   ru   r   r   r   )r   r/   ).0ir^   rv   ru   r   r   r#   r$   
<listcomp>  s   	z)Swin2SRStage.__init__.<locals>.<listcomp>Z1convr   r   Z3convr&   皙?TZnegative_sloper   r   F)rm   )rD   rE   r^   rv   r   
ModuleListrangelayersZresi_connectionri   convr   	LeakyReLUrT   patch_embedrr   patch_unembed)rF   r^   rv   ru   depthr   rA   r   rG   r   r$   rE     s(    
	

zSwin2SRStage.__init__NF)r   r   r   r   r;   c                 C   s   |}|\}}t | jD ]2\}}	|d k	r.|| nd }
|	|||
|}|d }q||||f}| ||}| |}| |\}}|| }||f}|r||dd  7 }|S r   )	enumerater   r   r   r   )rF   r   r   r   r   Zresidualr1   r2   r   Zlayer_modulelayer_head_maskr   ra   rp   Zstage_outputsr#   r#   r$   rK     s    

zSwin2SRStage.forward)r   )NF)r   r   r   r   rE   r    rP   r   rq   r   r!   r   rK   rR   r#   r#   rG   r$   r   {  s   &  
r   c                
       s\   e Zd Z fddZd	ejeeef eej	 ee
 ee
 ee
 eeef dddZ  ZS )
Swin2SREncoderc                    sj   t    t j| _ | _dd td jt	 jD t
 fddt| jD | _d| _d S )Nc                 S   s   g | ]}|  qS r#   )r   )r   r   r#   r#   r$   r     s     z+Swin2SREncoder.__init__.<locals>.<listcomp>r   c                    sd   g | ]\}t   jd  d f j|  j| t jd| t jd|d   d dqS )r   r   N)r^   rv   ru   r   r   rA   r   )r   rY   depthsr   r   )r   Z	stage_idxr^   Zdpr	grid_sizer#   r$   r     s   
*F)rD   rE   r   r   Z
num_stagesr^   r    Zlinspacer   r   r   r   r   stagesgradient_checkpointing)rF   r^   r   rG   r   r$   rE     s    
 
zSwin2SREncoder.__init__NFT)r   r   r   r   output_hidden_statesreturn_dictr;   c                    s  d}|rdnd } rdnd }	|r*||f7 }t | jD ]\}
}|d k	rL||
 nd }| jr| jr fdd}tjj|||||}n|||| }|d }|d }|d |d f}||f7 }|r||f7 } r4|	|dd  7 }	q4|std	d
 |||	fD S t|||	dS )Nr#   c                    s    fdd}|S )Nc                     s    | f S rC   r#   )inputs)moduler   r#   r$   custom_forward  s    zMSwin2SREncoder.forward.<locals>.create_custom_forward.<locals>.custom_forwardr#   )r   r   r   )r   r$   create_custom_forward  s    z5Swin2SREncoder.forward.<locals>.create_custom_forwardr   r   r   r(   r%   c                 s   s   | ]}|d k	r|V  qd S rC   r#   )r   vr#   r#   r$   	<genexpr>  s      z)Swin2SREncoder.forward.<locals>.<genexpr>r   r   r   )	r   r   r   r:   r    utils
checkpointtupler   )rF   r   r   r   r   r   r   Zall_input_dimensionsZall_hidden_statesZall_self_attentionsr   Zstage_moduler   r   r   ra   r#   r   r$   rK     s>    	
   

zSwin2SREncoder.forward)NFFT)r   r   r   rE   r    rP   r   rq   r   r!   r   r   r   rK   rR   r#   r#   rG   r$   r     s       

r   c                   @   s2   e Zd ZdZeZdZdZdZdd Z	ddd	Z
d
S )Swin2SRPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    swin2srr_   Tc                 C   sn   t |tjtjfrDtjjj|jj| j	j
d |jdk	rj|jj  n&t |tjrj|jj  |jjd dS )zInitialize the weights)ZstdNr   )rd   r   rz   ri   r    initZtrunc_normal_weightdatar^   Zinitializer_rangery   Zzero_rk   Zfill_)rF   r   r#   r#   r$   _init_weights  s    
z$Swin2SRPreTrainedModel._init_weightsFc                 C   s   t |tr||_d S rC   )rd   r   r   )rF   r   r   r#   r#   r$   _set_gradient_checkpointing%  s    
z2Swin2SRPreTrainedModel._set_gradient_checkpointingN)F)r   r   r   r   r   config_classZbase_model_prefixZmain_input_nameZsupports_gradient_checkpointingr  r  r#   r#   r#   r$   r    s   
r  aJ  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`Swin2SRConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aN  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`Swin2SRImageProcessor.__call__`] for details.
        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zaThe bare Swin2SR Model transformer outputting raw hidden-states without any specific head on top.c                       s   e Zd Z fddZdd Zdd Zdd Zeee	e
eed	ed
dejeej ee ee ee eeef dddZ  ZS )Swin2SRModelc                    s   t  | || _|jdkr:d}t|dddd| _ntdddd| _|j	| _	t
|j|jddd| _t|| _t|| jjjd| _t
j|j|jd| _t|| _t
|j|jddd| _|   d S )Nr   )gw#?g8EGr?gB`"?r   )r   r   )rD   rE   r^   r3   r    rP   r+   meanrX   	img_ranger   ri   rY   first_convolutionrS   r`   r   rU   rh   encoderrk   r   rl   rr   r   conv_after_body	post_init)rF   r^   Zrgb_meanrG   r#   r$   rE   P  s    


zSwin2SRModel.__init__c                 C   s   | j jS rC   )r`   rU   rM   r#   r#   r$   get_input_embeddingsf  s    z!Swin2SRModel.get_input_embeddingsc                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  layerr   r   )rF   Zheads_to_pruner  r   r#   r#   r$   _prune_headsi  s    zSwin2SRModel._prune_headsc                 C   sr   |  \}}}}| jj}|||  | }|||  | }tj|d|d|fd}| j|| _|| j | j }|S )Nr   Zreflect)	r   r^   r/   r   r}   r~   r  Ztype_asr  )rF   r_   rp   r1   r2   r/   Zmodulo_pad_heightZmodulo_pad_widthr#   r#   r$   pad_and_normalizeq  s    zSwin2SRModel.pad_and_normalizeZvision)r   output_typer	  ZmodalityZexpected_outputN)r_   r   r   r   r   r;   c                 C   s   |d k	r|n| j j}|d k	r |n| j j}|d k	r4|n| j j}| |t| j j}|j\}}}}| |}| 	|}	| 
|	\}
}| j|
|||||d}|d }| |}| |||f}| ||	 }|s|f|dd   }|S t||j|jdS )Nr   r   r   r   r   r   r   )r^   r   r   use_return_dictZget_head_maskr   r   r*   r  r  r`   r  rl   r   r  r	   r   r   )rF   r_   r   r   r   r   rp   r1   r2   r`   Zembedding_outputr   Zencoder_outputssequence_outputr@   r#   r#   r$   rK     s:    

	
zSwin2SRModel.forward)NNNN)r   r   r   rE   r  r  r  r   SWIN2SR_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr	   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr    r!   r   r   r   r   rK   rR   r#   r#   rG   r$   r
  K  s0   
    
r
  c                       s(   e Zd ZdZ fddZdd Z  ZS )UpsamplezUpsample module.

    Args:
        scale (`int`):
            Scale factor. Supported scales: 2^n and 3.
        num_features (`int`):
            Channel number of intermediate features.
    c                    s   t    || _||d @ dkrxttt|dD ]@}| d| t	|d| ddd | d| t
d q4n>|dkrt	|d| ddd| _t
d| _ntd	| d
d S )Nr   r   r%   convolution_r&   r   pixelshuffle_	   zScale z/ is not supported. Supported scales: 2^n and 3.)rD   rE   scaler   rq   r   r   Z
add_moduler   ri   PixelShuffleconvolutionpixelshuffler   )rF   r"  num_featuresr   rG   r#   r$   rE     s    
$zUpsample.__init__c                 C   s|   | j | j d @ dkrZttt| j dD ],}| d| |}| d| |}q*n| j dkrx| |}| |}|S )Nr   r   r%   r  r   r   )r"  r   rq   r   r   __getattr__r$  r%  )rF   Zhidden_stater   r#   r#   r$   rK     s    


zUpsample.forwardrs   r#   r#   rG   r$   r    s   	r  c                       s(   e Zd ZdZ fddZdd Z  ZS )UpsampleOneStepaB  UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)

    Used in lightweight SR to save parameters.

    Args:
        scale (int):
            Scale factor. Supported scales: 2^n and 3.
        in_channels (int):
            Channel number of intermediate features.
    c                    s6   t    t||d | ddd| _t|| _d S )Nr%   r   r   )rD   rE   r   ri   r   r#  pixel_shuffle)rF   r"  Zin_channelsZout_channelsrG   r#   r$   rE     s    
zUpsampleOneStep.__init__c                 C   s   |  |}| |}|S rC   )r   r)  )rF   r   r#   r#   r$   rK     s    

zUpsampleOneStep.forwardrs   r#   r#   rG   r$   r(    s   r(  c                       s$   e Zd Z fddZdd Z  ZS )PixelShuffleUpsamplerc                    sV   t    t|j|ddd| _tjdd| _t|j	|| _
t||jddd| _d S Nr   r   Tr   )rD   rE   r   ri   rY   conv_before_upsampler   
activationr  upscaleupsampler3   final_convolutionrF   r^   r&  rG   r#   r$   rE      s
    
zPixelShuffleUpsampler.__init__c                 C   s,   |  |}| |}| |}| |}|S rC   )r,  r-  r/  r0  )rF   r  r   r#   r#   r$   rK     s
    



zPixelShuffleUpsampler.forwardr   r   r   rE   rK   rR   r#   r#   rG   r$   r*    s   r*  c                       s$   e Zd Z fddZdd Z  ZS )NearestConvUpsamplerc                    s   t    |jdkrtdt|j|ddd| _tjdd| _	t||ddd| _
t||ddd| _t||ddd| _t||jddd| _tjddd| _d S )	Nr&   zNThe nearest+conv upsampler only supports an upscale factor of 4 at the moment.r   r   Tr   r   r   )rD   rE   r.  r   r   ri   rY   r,  r   r-  conv_up1conv_up2conv_hrr3   r0  lrelur1  rG   r#   r$   rE     s    

zNearestConvUpsampler.__init__c              	   C   sn   |  |}| |}| | tjjj|ddd}| | tjjj|ddd}| 	| | 
|}|S )Nr%   Znearest)Zscale_factormode)r,  r-  r7  r4  r    r   r}   interpolater5  r0  r6  )rF   r  reconstructionr#   r#   r$   rK     s    

zNearestConvUpsampler.forwardr2  r#   r#   rG   r$   r3    s   r3  c                       s$   e Zd Z fddZdd Z  ZS )PixelShuffleAuxUpsamplerc              	      s   t    |j| _t|j|ddd| _t|j|ddd| _tj	dd| _
t||jddd| _ttd|dddtj	dd| _t|j|| _t||jddd| _d S r+  )rD   rE   r.  r   ri   r3   conv_bicubicrY   r,  r   r-  conv_auxr   conv_after_auxr  r/  r0  r1  rG   r#   r$   rE   ,  s    
$z!PixelShuffleAuxUpsampler.__init__c                 C   s   |  |}| |}| |}| |}| |}| |d d d d d || j d || j f |d d d d d || j d || j f  }| |}||fS rC   )r<  r,  r-  r=  r>  r/  r.  r0  )rF   r  bicubicr1   r2   auxr:  r#   r#   r$   rK   8  s    




0*
z PixelShuffleAuxUpsampler.forwardr2  r#   r#   rG   r$   r;  +  s   r;  zm
    Swin2SR Model transformer with an upsampler head on top for image super resolution and restoration.
    c                       sr   e Zd Z fddZeeeeedde	e
j e	e
j e	e
j e	e e	e e	e eeef dddZ  ZS )	Swin2SRForImageSuperResolutionc                    s   t  | t|| _|j| _|j| _d}| jdkrBt||| _nh| jdkrZt||| _nP| jdkrzt	|j|j
|j| _n0| jdkrt||| _nt|j
|jddd| _|   d S )N@   r%  pixelshuffle_auxpixelshuffledirectnearest+convr   r   )rD   rE   r
  r  	upsamplerr.  r*  r/  r;  r(  rY   r3   r3  r   ri   r0  r  r1  rG   r#   r$   rE   N  s    




z'Swin2SRForImageSuperResolution.__init__)r  r	  N)r_   r   labelsr   r   r   r;   c                 C   sd  |dk	r|n| j j}|jdd \}}| j jdkrVtjj||| j || j fddd}	| j|||||d}
|
d }| jd	kr| 	|}nB| jdkr| 	||	||\}}|| jj
 | jj }n|| | }|| jj
 | jj }|ddddd|| j d|| j f }d}|dk	r td
|sP|f|
dd  }|dk	rL|f| S |S t|||
j|
jdS )a  
        Returns:

        Example:
         ```python
         >>> import torch
         >>> import numpy as np
         >>> from PIL import Image
         >>> import requests

         >>> from transformers import AutoImageProcessor, Swin2SRForImageSuperResolution

         >>> processor = AutoImageProcessor.from_pretrained("caidas/swin2SR-classical-sr-x2-64")
         >>> model = Swin2SRForImageSuperResolution.from_pretrained("caidas/swin2SR-classical-sr-x2-64")

         >>> url = "https://huggingface.co/spaces/jjourney1125/swin2sr/resolve/main/samples/butterfly.jpg"
         >>> image = Image.open(requests.get(url, stream=True).raw)
         >>> # prepare image for the model
         >>> inputs = processor(image, return_tensors="pt")

         >>> # forward pass
         >>> with torch.no_grad():
         ...     outputs = model(**inputs)

         >>> output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
         >>> output = np.moveaxis(output, source=0, destination=-1)
         >>> output = (output * 255.0).round().astype(np.uint8)  # float32 to uint8
         >>> # you can visualize `output` with `Image.fromarray`
         ```Nr%   rC  r?  F)r   r8  Zalign_cornersr  r   )r%  rD  rE  z'Training is not supported at the momentr   )lossr:  r   r   )r^   r  r*   rF  r   r}   r9  r.  r  r/  r  r  r0  NotImplementedErrorr
   r   r   )rF   r_   r   rG  r   r   r   r1   r2   r?  r   r  r:  r@  rH  r@   r#   r#   r$   rK   h  sJ    (

,
z&Swin2SRForImageSuperResolution.forward)NNNNNN)r   r   r   rE   r   r  r   r
   r  r   r    r!   Z
LongTensorr   r   r   rK   rR   r#   r#   rG   r$   rA  G  s$   
      
rA  )r7   F)Gr   collections.abcre   r   dataclassesr   typingr   r   r   r    Ztorch.utils.checkpointr   Zactivationsr   Zmodeling_outputsr	   r
   Zmodeling_utilsr   Zpytorch_utilsr   r   r   r   r   r   r   r   r   r   Zconfiguration_swin2srr   Z
get_loggerr   loggerr  r  r  Z%SWIN2SR_PRETRAINED_MODEL_ARCHIVE_LISTr   r5   r6   rP   rO   r   rA   r   rB   rS   rT   rr   rt   r   r   r   r   r   r   r   r   r  ZSWIN2SR_START_DOCSTRINGr  r
  r  r(  r*  r3  r;  rA  r#   r#   r#   r$   <module>   sr    
7 / GNq&