U
    ,-ea                     @   s\  d Z ddlZddlmZ ddlmZmZmZmZ ddl	Z	ddl
Z	ddl	mZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZmZ ddlmZmZmZmZmZ ddl m!Z! e"e#Z$dZ%dZ&ddgZ'dZ(dZ)eG dd deZ*G dd dej+Z,G dd dej+Z-G dd dej+Z.G dd dej+Z/G dd dej+Z0G d d! d!ej+Z1G d"d# d#ej+Z2G d$d% d%ej+Z3G d&d' d'ej+Z4G d(d) d)ej+Z5G d*d+ d+ej+Z6G d,d- d-ej+Z7G d.d/ d/ej+Z8G d0d1 d1eZ9ed2e(G d3d4 d4e9Z:ed5e(G d6d7 d7e9Z;ed8e(G d9d: d:e9Z<ed;e(G d<d= d=e9Z=dS )>z PyTorch Bros model.    N)	dataclass)ListOptionalTupleUnion)nn)CrossEntropyLoss   )ACT2FN))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )
BrosConfigzjinho8345/bros-base-uncasedr   zjinho8345/bros-large-uncasedaK  
    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
    and behavior.

    Parameters:
        config ([`BrosConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`BrosProcessor`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)

        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.

        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)

        bbox_first_token_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)

        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)

        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.

        return_dict (`bool`, *optional*):
            Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
c                   @   sl   e Zd ZU dZdZeej ed< dZ	ejed< dZ
ejed< dZeeej  ed< dZeeej  ed< dS )BrosSpadeOutputa  
    Base class for outputs of token classification models.

    Args:
        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided) :
            Classification loss.
        initial_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.num_labels)`):
            Classification scores for entity initial tokens (before SoftMax).
        subsequent_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, sequence_length+1)`):
            Classification scores for entity sequence tokens (before SoftMax).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nlossinitial_token_logitssubsequent_token_logitshidden_states
attentions)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r   r   r    r&   r&   g/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/transformers/models/bros/modeling_bros.pyr      s   
r   c                       s0   e Zd Z fddZejejdddZ  ZS )BrosPositionalEmbedding1Dc                    sD   t t|   |j| _ddtd| jd| j   }| d| d S )Nr   i'          g       @inv_freq)superr(   __init__dim_bbox_sinusoid_emb_1dr#   arangeregister_buffer)selfconfigr*   	__class__r&   r'   r,      s    z"BrosPositionalEmbedding1D.__init__)pos_seqreturnc                 C   sX   |  }|\}}}||||d| jddd| jd  }tj| | gdd}|S )Nr      dim)sizeviewr*   r-   r#   catsincos)r0   r4   Zseq_sizeb1b2Zb3Zsinusoid_inpZpos_embr&   r&   r'   forward   s
    
(z!BrosPositionalEmbedding1D.forwardr   r    r!   r,   r#   TensorrA   __classcell__r&   r&   r2   r'   r(      s   
r(   c                       s0   e Zd Z fddZejejdddZ  ZS )BrosPositionalEmbedding2Dc                    s.   t t|   |j| _t|| _t|| _d S N)r+   rE   r,   dim_bboxr(   	x_pos_emb	y_pos_embr0   r1   r2   r&   r'   r,      s    
z"BrosPositionalEmbedding2D.__init__)bboxr5   c                 C   sd   g }t | jD ]B}|d dkr8|| |d|f  q|| |d|f  qtj|dd}|S )Nr6   r   .r7   r8   )rangerG   appendrH   rI   r#   r<   )r0   rK   stackibbox_pos_embr&   r&   r'   rA      s    z!BrosPositionalEmbedding2D.forwardrB   r&   r&   r2   r'   rE      s   rE   c                       s,   e Zd Z fddZejdddZ  ZS )BrosBboxEmbeddingsc                    s2   t t|   t|| _tj|j|jdd| _	d S )NF)bias)
r+   rQ   r,   rE   bbox_sinusoid_embr   LinearZdim_bbox_sinusoid_emb_2dZdim_bbox_projectionbbox_projectionrJ   r2   r&   r'   r,      s    
zBrosBboxEmbeddings.__init__)rK   c                 C   s\   | dd}|d d d d d d d f |d d d d d d d f  }| |}| |}|S )Nr   r   )	transposerS   rU   )r0   rK   Zbbox_tZbbox_posrP   r&   r&   r'   rA      s
    8

zBrosBboxEmbeddings.forwardrB   r&   r&   r2   r'   rQ      s   rQ   c                       sT   e Zd ZdZ fddZd	eej eej eej eej eejdddZ	  Z
S )
BrosTextEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    s   t    tj|j|j|jd| _t|j|j| _	t|j
|j| _tj|j|jd| _t|j| _t|dd| _| dt|jd | jdtj| j tj| jjdd	d
 d S )N)padding_idxZepsposition_embedding_typeabsoluteposition_ids)r   r7   token_type_idsdtypedeviceF)
persistent)r+   r,   r   	EmbeddingZ
vocab_sizehidden_sizeZpad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsZtype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutgetattrrZ   r/   r#   r.   expandzerosr\   r:   longr`   rJ   r2   r&   r'   r,      s"    
zBrosTextEmbeddings.__init__Nr   )	input_idsr]   r\   inputs_embedspast_key_values_lengthr5   c                 C   s   |d k	r|  }n|  d d }|d }|d krL| jd d ||| f }|d krt| dr| jd d d |f }||d |}	|	}ntj|tj| jjd}|d kr| 	|}| 
|}
||
 }| jdkr| |}||7 }| |}| |}|S )Nr7   r   r]   r   r^   r[   )r:   r\   hasattrr]   rn   r#   ro   rp   r`   rd   rg   rZ   rf   rh   rl   )r0   rq   r]   r\   rr   rs   input_shape
seq_lengthbuffered_token_type_ids buffered_token_type_ids_expandedrg   
embeddingsrf   r&   r&   r'   rA      s,    







zBrosTextEmbeddings.forward)NNNNr   )r   r    r!   r"   r,   r   r#   rC   intrA   rD   r&   r&   r2   r'   rW      s        rW   c                       s   e Zd Z fddZejdddZdejejeej eej eej eej eeeej	   eej eej d	d	d
Z
  ZS )BrosSelfAttentionc                    s   t    |j|j dkr>t|ds>td|j d|j d|j| _t|j|j | _| j| j | _t	
|j| j| _t	
|j| j| _t	
|j| j| _t	|j| _t|dd| _| jdks| jd	kr|j| _t	d
|j d | j| _|j| _d S )Nr   Zembedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()rZ   r[   relative_keyrelative_key_queryr6   r   )r+   r,   rc   num_attention_headsrt   
ValueErrorrz   attention_head_sizeall_head_sizer   rT   querykeyvaluerj   Zattention_probs_dropout_probrl   rm   rZ   re   rb   distance_embedding
is_decoderrJ   r2   r&   r'   r,   !  s"    
zBrosSelfAttention.__init__)xc                 C   s6   |  d d | j| jf }|j| }|ddddS )Nr7   r   r6   r   r	   )r:   r   r   r;   permute)r0   r   Znew_x_shaper&   r&   r'   transpose_for_scores9  s    
z&BrosSelfAttention.transpose_for_scoresNF	r   rP   attention_mask	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsr5   c	                 C   s  |  |}	|d k	}
|
r4|d k	r4|d }|d }|}n|
r^| | |}| | |}|}nv|d k	r| | |}| | |}tj|d |gdd}tj|d |gdd}n | | |}| | |}| |	}| jr||f}t||dd}| j	dks| j	dkr|
 d }tj|tj|jd	dd}tj|tj|jd	dd}|| }| || j d }|j|jd
}| j	dkrtd||}|| }n4| j	dkrtd||}td||}|| | }|j\}}}}|||||}|ddddg}td||f}|| }|t| j }|d k	rH|| }tjdd|}| |}|d k	rt|| }t||}|dddd }|
 d d | jf }|j| }|r||fn|f}| jr||f }|S )Nr   r   r6   r8   r7   r}   r~   r^   r_   zbhld,lrd->bhlrzbhrd,lrd->bhlrr	   zbnid,bijd->bnij)r   r   r   r   r#   r<   r   matmulrV   rZ   r:   r.   rp   r`   r;   r   re   tor_   Zeinsumshaper   mathsqrtr   r   ZSoftmaxrl   
contiguousr   )r0   r   rP   r   r   r   r   r   r   Zmixed_query_layerZis_cross_attention	key_layerZvalue_layerquery_layerZattention_scoresrv   Zposition_ids_lZposition_ids_rZdistanceZpositional_embeddingZrelative_position_scoresZrelative_position_scores_queryZrelative_position_scores_key
batch_sizeZn_headZd_headZbbox_pos_scoresZattention_probsZcontext_layerZnew_context_layer_shapeoutputsr&   r&   r'   rA   A  sn    







zBrosSelfAttention.forward)NNNNNF)r   r    r!   r,   r#   rC   r   r   r   r$   rA   rD   r&   r&   r2   r'   r{      s&         r{   c                       s4   e Zd Z fddZejejejdddZ  ZS )BrosSelfOutputc                    sB   t    t|j|j| _tj|j|jd| _t|j	| _
d S NrY   )r+   r,   r   rT   rc   denserh   ri   rj   rk   rl   rJ   r2   r&   r'   r,     s    
zBrosSelfOutput.__init__r   input_tensorr5   c                 C   s&   |  |}| |}| || }|S rF   r   rl   rh   r0   r   r   r&   r&   r'   rA     s    

zBrosSelfOutput.forwardrB   r&   r&   r2   r'   r     s   r   c                       sx   e Zd Z fddZdd Zd
ejejeej eej eej eej eeeej	   ee
 eej d	dd	Z  ZS )BrosAttentionc                    s*   t    t|| _t|| _t | _d S rF   )r+   r,   r{   r0   r   outputsetpruned_headsrJ   r2   r&   r'   r,     s    


zBrosAttention.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   r8   )lenr   r0   r   r   r   r   r   r   r   r   r   r   union)r0   headsindexr&   r&   r'   prune_heads  s    zBrosAttention.prune_headsNFr   c	              
   C   s@   | j ||||||||d}	| |	d |}
|
f|	dd   }|S )Nr   rP   r   r   r   r   r   r   r   r   )r0   r   )r0   r   rP   r   r   r   r   r   r   Zself_outputsattention_outputr   r&   r&   r'   rA     s    
zBrosAttention.forward)NNNNNF)r   r    r!   r,   r   r#   rC   r   r   r$   boolrA   rD   r&   r&   r2   r'   r     s&         r   c                       s0   e Zd Z fddZejejdddZ  ZS )BrosIntermediatec                    sB   t    t|j|j| _t|jt	r6t
|j | _n|j| _d S rF   )r+   r,   r   rT   rc   intermediate_sizer   
isinstanceZ
hidden_actstrr
   intermediate_act_fnrJ   r2   r&   r'   r,     s
    
zBrosIntermediate.__init__r   r5   c                 C   s   |  |}| |}|S rF   )r   r   )r0   r   r&   r&   r'   rA     s    

zBrosIntermediate.forwardrB   r&   r&   r2   r'   r     s   r   c                       s4   e Zd Z fddZejejejdddZ  ZS )
BrosOutputc                    sB   t    t|j|j| _tj|j|jd| _t	|j
| _d S r   )r+   r,   r   rT   r   rc   r   rh   ri   rj   rk   rl   rJ   r2   r&   r'   r,      s    
zBrosOutput.__init__r   c                 C   s&   |  |}| |}| || }|S rF   r   r   r&   r&   r'   rA     s    

zBrosOutput.forwardrB   r&   r&   r2   r'   r     s   r   c                       sx   e Zd Z fddZd
ejejeej eej eej eej eeeej   ee	 eej d	ddZ
dd	 Z  ZS )	BrosLayerc                    sn   t    |j| _d| _t|| _|j| _|j| _| jrV| jsLt|  dt|| _	t
|| _t|| _d S )Nr   z> should be used as a decoder model if cross attention is added)r+   r,   chunk_size_feed_forwardseq_len_dimr   	attentionr   add_cross_attention	Exceptioncrossattentionr   intermediater   r   rJ   r2   r&   r'   r,     s    



zBrosLayer.__init__NFr   c	              	   C   s  |d k	r|d d nd }	| j ||||||	d}
|
d }| jrR|
dd }|
d }n|
dd  }d }| jr|d k	rt| drtd|  d|d k	r|d	d  nd }| |||||||}|d }||dd  }|d }|| }t| j| j| j|}|f| }| jr||f }|S )
Nr6   )rP   r   r   r   r   r   r   r7   r   z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r   r   rt   r   r   r   feed_forward_chunkr   r   )r0   r   rP   r   r   r   r   r   r   Zself_attn_past_key_valueZself_attention_outputsr   r   Zpresent_key_valueZcross_attn_present_key_valueZcross_attn_past_key_valueZcross_attention_outputslayer_outputr&   r&   r'   rA     sX    


	

zBrosLayer.forwardc                 C   s   |  |}| ||}|S rF   )r   r   )r0   r   Zintermediate_outputr   r&   r&   r'   r   a  s    
zBrosLayer.feed_forward_chunk)NNNNNF)r   r    r!   r,   r#   rC   r   r$   r   r   rA   r   rD   r&   r&   r2   r'   r     s&         Er   c                       s   e Zd Z fddZd	ejejeej eej eej eej eeeej   ee	 ee	 ee	 ee	 e
eej ef dddZ  ZS )
BrosEncoderc                    s4   t     | _t fddt jD | _d S )Nc                    s   g | ]}t  qS r&   )r   ).0_r1   r&   r'   
<listcomp>k  s     z(BrosEncoder.__init__.<locals>.<listcomp>)r+   r,   r1   r   Z
ModuleListrL   num_hidden_layerslayerrJ   r2   r   r'   r,   h  s    
zBrosEncoder.__init__NFT)r   rP   r   r   r   r   past_key_values	use_cacher   output_hidden_statesreturn_dictr5   c                    st  |
rdnd } rdnd } r(| j jr(dnd }|r4dnd }t| jD ]\}}|
rX||f }|d k	rh|| nd }|d k	r||| nd }t| j ddr| jr|rtd d} fdd}tj	j

||||||||}n|||||||| d}|d }|r||d	 f7 } rB||d
 f }| j jrB||d f }qB|
r@||f }|sbtdd |||||fD S t|||||dS )Nr&   Zgradient_checkpointingFzh`use_cache=True` is incompatible with `config.gradient_checkpointing=True`. Setting `use_cache=False`...c                    s    fdd}|S )Nc                     s    | f S rF   r&   )inputs)moduler   r&   r'   custom_forward  s    zJBrosEncoder.forward.<locals>.create_custom_forward.<locals>.custom_forwardr&   )r   r   r   )r   r'   create_custom_forward  s    z2BrosEncoder.forward.<locals>.create_custom_forwardr   r   r7   r   r6   c                 s   s   | ]}|d k	r|V  qd S rF   r&   )r   vr&   r&   r'   	<genexpr>  s   z&BrosEncoder.forward.<locals>.<genexpr>)last_hidden_stater   r   r   cross_attentions)r1   r   	enumerater   rm   Ztrainingloggerwarningr#   utils
checkpointtupler   )r0   r   rP   r   r   r   r   r   r   r   r   r   Zall_hidden_statesZall_self_attentionsZall_cross_attentionsZnext_decoder_cacherO   Zlayer_moduleZlayer_head_maskr   r   Zlayer_outputsr&   r   r'   rA   m  sx    


zBrosEncoder.forward)	NNNNNNFFT)r   r    r!   r,   r#   rC   r   r$   r   r   r   r   rA   rD   r&   r&   r2   r'   r   g  s0   	         r   c                       s0   e Zd Z fddZejejdddZ  ZS )
BrosPoolerc                    s*   t    t|j|j| _t | _d S rF   )r+   r,   r   rT   rc   r   ZTanh
activationrJ   r2   r&   r'   r,     s    
zBrosPooler.__init__r   c                 C   s(   |d d df }|  |}| |}|S )Nr   )r   r   )r0   r   Zfirst_token_tensorpooled_outputr&   r&   r'   rA     s    

zBrosPooler.forwardrB   r&   r&   r2   r'   r     s   r   c                       s0   e Zd Z fddZejejdddZ  ZS )BrosRelationExtractorc                    s   t    |j| _|j| _|j| _|j| _t| j| _	t
| j| j| j | _t
| j| j| j | _ttd| j| _d S )Nr   )r+   r,   n_relationsrc   backbone_hidden_sizehead_hidden_sizeZclassifier_dropout_probr   rj   droprT   r   r   	Parameterr#   ro   
dummy_noderJ   r2   r&   r'   r,     s    
zBrosRelationExtractor.__init__)r   r   c              	   C   s   |  | |}| jdd|dd}tj||gdd}| | |}|	|d|d| j
| j}|	|d|d| j
| j}t|dddd|dddd}|S )Nr   r   Zaxisr6   r	   )r   r   r   Z	unsqueezerepeatr:   r#   r<   r   r;   r   r   r   r   )r0   r   r   Z	dummy_vecZrelation_scorer&   r&   r'   rA     s          zBrosRelationExtractor.forwardrB   r&   r&   r2   r'   r     s   r   c                   @   s    e Zd ZdZeZdZdd ZdS )BrosPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    brosc                 C   s   t |tjr:|jjjd| jjd |jdk	r|jj	  nft |tj
rz|jjjd| jjd |jdk	r|jj|j 	  n&t |tjr|jj	  |jjd dS )zInitialize the weightsr)   )ZmeanZstdNg      ?)r   r   rT   weightdataZnormal_r1   Zinitializer_rangerR   Zzero_rb   rX   rh   Zfill_)r0   r   r&   r&   r'   _init_weights  s    

z!BrosPreTrainedModel._init_weightsN)r   r    r!   r"   r   config_classZbase_model_prefixr   r&   r&   r&   r'   r     s   r   z^The bare Bros Model transformer outputting raw hidden-states without any specific head on top.c                       s   e Zd Zd fdd	Zdd Zdd Zdd	 Zee	d
e
eeddeej eej eej eej eej eej eej eej eej eeej  ee ee ee ee eeej ef dddZ  ZS )	BrosModelTc                    sN   t  | || _t|| _t|| _t|| _|r<t	|nd | _
|   d S rF   )r+   r,   r1   rW   ry   rQ   bbox_embeddingsr   encoderr   poolerinit_weights)r0   r1   Zadd_pooling_layerr2   r&   r'   r,     s    


zBrosModel.__init__c                 C   s   | j jS rF   ry   rd   )r0   r&   r&   r'   get_input_embeddings'  s    zBrosModel.get_input_embeddingsc                 C   s   || j _d S rF   r   )r0   r   r&   r&   r'   set_input_embeddings*  s    zBrosModel.set_input_embeddingsc                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr   r   r   r   )r0   Zheads_to_pruner   r   r&   r&   r'   _prune_heads-  s    zBrosModel._prune_headsbatch_size, sequence_lengthoutput_typer   N)rq   rK   r   r]   r\   r   rr   r   r   r   r   r   r   r   r5   c           "      C   s  |dk	r|n| j j}|dk	r |n| j j}|dk	r4|n| j j}| j jrZ|dk	rP|n| j j}nd}|dk	rx|dk	rxtdn4|dk	r| }n"|dk	r| dd }ntd|dkrtd|\}}|dk	r|jn|j}|
dk	r|
d d j	d nd}|dkrt
j||d	}|dkrbt| jd
rP| jjddd|f }|||}|}nt
j|t
j|d}| |||}| j jr|dk	r| \}}}||f}|	dkrt
j||d	}	| |	}nd}| || j j}| j|||||d}|j	d dkr|ddddddddddddgf }|| j j }| |}| j|||||||
||||d}|d } | jdk	rp| | nd}!|s| |!f|dd  S t| |!|j|j|j|jdS )a  
        Returns:

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosModel

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosModel.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        ```NFzDYou cannot specify both input_ids and inputs_embeds at the same timer7   z5You have to specify either input_ids or inputs_embedszYou have to specify bboxr   r6   )r`   r]   r^   )rq   r\   r]   rr   rs      r   r	   )
rP   r   r   r   r   r   r   r   r   r   )r   Zpooler_outputr   r   r   r   )r1   r   r   use_return_dictr   r   r   r:   r`   r   r#   Zonesrt   ry   r]   rn   ro   rp   Zget_extended_attention_maskZinvert_attention_maskZget_head_maskr   Z
bbox_scaler   r   r   r   r   r   r   r   )"r0   rq   rK   r   r]   r\   r   rr   r   r   r   r   r   r   r   ru   r   rv   r`   rs   rw   rx   Zextended_attention_maskZencoder_batch_sizeZencoder_sequence_lengthr   Zencoder_hidden_shapeZencoder_extended_attention_maskZembedding_outputZscaled_bboxZbbox_position_embeddingsZencoder_outputssequence_outputr   r&   r&   r'   rA   5  s    '




	&
zBrosModel.forward)T)NNNNNNNNNNNNNN)r   r    r!   r,   r   r   r   r   BROS_INPUTS_DOCSTRINGformatr   r   _CONFIG_FOR_DOCr   r#   rC   r   r$   r   r   r   rA   rD   r&   r&   r2   r'   r     sJ   
              r   z
    Bros Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    Named-Entity-Recognition (NER) tasks.
    c                       s   e Zd ZdgZ fddZeedee	e
dd
eej eej eej eej eej eej eej eej eej ee ee ee eeej e	f ddd	Z  ZS )BrosForTokenClassificationr   c                    s^   t  | |j| _t|| _t|dr.|jn|j}t	|| _
t|j|j| _|   d S Nclassifier_dropout)r+   r,   
num_labelsr   r   rt   r  rk   r   rj   rl   rT   rc   
classifierr   r0   r1   r  r2   r&   r'   r,     s    
z#BrosForTokenClassification.__init__r   r   Nrq   rK   r   bbox_first_token_maskr]   r\   r   rr   labelsr   r   r   r5   c                 C   s   |dk	r|n| j j}| j||||||||
||d
}|d }| |}| |}d}|	dk	rt }|dk	r|d}||d| j| |	d| }n||d| j|	d}|s|f|dd  }|dk	r|f| S |S t|||j	|j
dS )ax  

        Returns:

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```N)	rK   r   r]   r\   r   rr   r   r   r   r   r7   r6   r   logitsr   r   )r1   r   r   rl   r  r   r;   r  r   r   r   )r0   rq   rK   r   r  r]   r\   r   rr   r	  r   r   r   r   r   r  r   loss_fctr   r&   r&   r'   rA     sF    &


 z"BrosForTokenClassification.forward)NNNNNNNNNNNNr   r    r!   "_keys_to_ignore_on_load_unexpectedr,   r   r   r   r   r   r   r   r#   rC   r   r   r   rA   rD   r&   r&   r2   r'   r    s>   
            r  a  
    Bros Model with a token classification head on top (initial_token_layers and subsequent_token_layer on top of the
    hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. The initial_token_classifier is used to
    predict the first token of each entity, and the subsequent_token_classifier is used to predict the subsequent
    tokens within an entity. Compared to BrosForTokenClassification, this model is more robust to serialization errors
    since it predicts next token from one token.
    c                       s   e Zd ZdgZ fddZeedee	e
dd
eej eej eej eej eej eej eej eej eej eej ee ee ee eeej e	f ddd	Z  ZS )!BrosSpadeEEForTokenClassificationr   c              	      s   t  | || _|j| _|j| _|j| _t|| _t	|drD|j
n|j}tt|t|j|jt|t|j|j| _t|| _|   d S r  )r+   r,   r1   r  r   rc   r   r   r   rt   r  rk   r   Z
Sequentialrj   rT   initial_token_classifierr   subsequent_token_classifierr   r  r2   r&   r'   r,   :  s     

z*BrosSpadeEEForTokenClassification.__init__r   r   N)rq   rK   r   r  r]   r\   r   rr   initial_token_labelssubsequent_token_labelsr   r   r   r5   c                 C   s  |dk	r|n| j j}| j||||||||||d
}|d }|dd }| |dd }| ||d}d| }|j\}}|j	}t
j|t
|dg|gdd }||dddddf t
|jj}t
||d | }||dddddf t
|jj}|d }d}|	dk	r|
dk	rt }|	d}	|dk	r|d}||d| j| |	| }n||d| j|	}|
d}
||d|d | |
| }|| }|s||f|dd  }|dk	r|f| S |S t||||j|jdS )	a  
        Returns:

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeEEForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeEEForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```N
rq   rK   r   r]   r\   r   rr   r   r   r   r   r   r   r7   r6   )r   r   r   r   r   )r1   r   r   rV   r   r  r  squeezer   r`   r#   r<   ro   r   r   masked_fillfinfor_   mineyer;   r   r  r   r   r   )r0   rq   rK   r   r  r]   r\   r   rr   r  r  r   r   r   r   last_hidden_statesr   r   Zinv_attention_maskr   max_seq_lengthr`   Zinvalid_token_maskself_token_maskZsubsequent_token_maskr   r  Zinitial_token_lossZsubsequent_token_lossr   r&   r&   r'   rA   S  st    &
&  



z)BrosSpadeEEForTokenClassification.forward)NNNNNNNNNNNNN)r   r    r!   r  r,   r   r   r   r   r   r   r   r#   rC   r   r   r   rA   rD   r&   r&   r2   r'   r  -  sB   
             r  z
    Bros Model with a token classification head on top (a entity_linker layer on top of the hidden-states output) e.g.
    for Entity-Linking. The entity_linker is used to predict intra-entity links (one entity to another entity).
    c                       s   e Zd ZdgZ fddZeedee	e
dd
eej eej eej eej eej eej eej eej eej ee ee ee eeej e	f ddd	Z  ZS )!BrosSpadeELForTokenClassificationr   c                    s`   t  | || _|j| _|j| _|j| _t|| _t	|drD|j
n|j t|| _|   d S r  )r+   r,   r1   r  r   rc   r   r   r   rt   r  rk   r   entity_linkerr   rJ   r2   r&   r'   r,     s    

z*BrosSpadeELForTokenClassification.__init__r   r   Nr  c                 C   s  |dk	r|n| j j}| j||||||||
||d
}|d }|dd }| ||d}d}|	dk	rBt }|j\}}|j	}t
||d | }|d}t
j| t
j|dgt
jd|gdd}||dddddf t
|jj}||dddddf t
|jj}||d|d | |	d| }|sr|f|dd  }|dk	rn|f| S |S t|||j|jd	S )
a  
        Returns:

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeELForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeELForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```Nr  r   r   r7   r   r   r6   r
  )r1   r   r   rV   r   r  r  r   r   r`   r#   r  r   r   r;   r<   ro   r  r  r_   r  r   r   r   )r0   rq   rK   r   r  r]   r\   r   rr   r	  r   r   r   r   r  r  r   r  r   r  r`   r  maskr   r&   r&   r'   rA     sR    $


(($z)BrosSpadeELForTokenClassification.forward)NNNNNNNNNNNNr  r&   r&   r2   r'   r    s>   
            r  )>r"   r   dataclassesr   typingr   r   r   r   r#   Ztorch.utils.checkpointr   Ztorch.nnr   Zactivationsr
   Zmodeling_outputsr   r   r   Zmodeling_utilsr   Zpytorch_utilsr   r   r   r   r   r   r   r   r   Zconfiguration_brosr   Z
get_loggerr   r   Z_CHECKPOINT_FOR_DOCr   Z"BROS_PRETRAINED_MODEL_ARCHIVE_LISTZBROS_START_DOCSTRINGr   r   Moduler(   rE   rQ   rW   r{   r   r   r   r   r   r   r   r   r   r   r  r  r  r&   r&   r&   r'   <module>   sx   
DB 7Zd" ,a
 	