U
    9%emS                    @   sr  d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZ d dlmZ ddlmZ ddlmZ dd	lmZmZmZmZmZmZ d
dlm Z  d
dl!m"Z"m#Z#m$Z$ d
dl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1 e2e3Z4dZ5dZ6eG dd deZ7dZ8dd Z9dd Z:d`e	ej; e<ej;dddZ=ej;e>dddZ?ej;e	e> dddZ@d d! ZAdad$d%ZBd&d' ZCG d(d) d)ejDZEG d*d+ d+ejFZGejHjIdbej;e>ej;d-d.d/ZJG d0d1 d1ejFZKG d2d3 d3ejFZLG d4d5 d5ejFZMG d6d7 d7e$ZNG d8d9 d9ejFZOG d:d; d;ejFZPG d<d= d=ejFZQG d>d? d?ejFZRG d@dA dAejFZSG dBdC dCejFZTG dDdE dEZUdcdGdHZVdIdJ ZWG dKdL dLejFZXG dMdN dNejFZYG dOdP dPejFZZG dQdR dRejFZ[G dSdT dTejFZ\G dUdV dVejFZ]G dWdX dXejFZ^G dYdZ dZejFZ_G d[d\ d\ejFZ`ed]e"G d^d_ d_e$ZadS )d    N)	dataclass)partial)CallableDictListOptionalSequenceTupleUnion)	LayerNorm   )is_deepspeed_available)ModelOutput)ContextManagersadd_start_docstrings%add_start_docstrings_to_model_forwardis_scipy_availableloggingreplace_return_docstrings   )	EsmConfig)ESM_START_DOCSTRINGEsmModelEsmPreTrainedModel)	OFProteinRigidRotationatom14_to_atom37chunk_layercompute_predicted_aligned_error
compute_tm-frames_and_literature_positions_to_atom14_posmake_atom14_masksresidue_constantsto_pdbtorsion_angles_to_frameszfacebook/esmfold_v1r   c                   @   sT  e Zd ZU dZdZejed< dZejed< dZ	ejed< dZ
ejed< dZejed< dZejed< dZejed	< dZejed
< dZejed< dZejed< dZejed< dZejed< dZejed< dZejed< dZejed< dZejed< dZejed< dZejed< dZejed< dZejed< dZejed< dZejed< dZejed< dS )EsmForProteinFoldingOutputa!  
    Output type of [`EsmForProteinFoldingOutput`].

    Args:
        frames (`torch.FloatTensor`):
            Output frames.
        sidechain_frames (`torch.FloatTensor`):
            Output sidechain frames.
        unnormalized_angles (`torch.FloatTensor`):
            Predicted unnormalized backbone and side chain torsion angles.
        angles (`torch.FloatTensor`):
            Predicted backbone and side chain torsion angles.
        positions (`torch.FloatTensor`):
            Predicted positions of the backbone and side chain atoms.
        states (`torch.FloatTensor`):
            Hidden states from the protein folding trunk.
        s_s (`torch.FloatTensor`):
            Per-residue embeddings derived by concatenating the hidden states of each layer of the ESM-2 LM stem.
        s_z (`torch.FloatTensor`):
            Pairwise residue embeddings.
        distogram_logits (`torch.FloatTensor`):
            Input logits to the distogram used to compute residue distances.
        lm_logits (`torch.FloatTensor`):
            Logits output by the ESM-2 protein language model stem.
        aatype (`torch.FloatTensor`):
            Input amino acids (AlphaFold2 indices).
        atom14_atom_exists (`torch.FloatTensor`):
            Whether each atom exists in the atom14 representation.
        residx_atom14_to_atom37 (`torch.FloatTensor`):
            Mapping between atoms in the atom14 and atom37 representations.
        residx_atom37_to_atom14 (`torch.FloatTensor`):
            Mapping between atoms in the atom37 and atom14 representations.
        atom37_atom_exists (`torch.FloatTensor`):
            Whether each atom exists in the atom37 representation.
        residue_index (`torch.FloatTensor`):
            The index of each residue in the protein chain. Unless internal padding tokens are used, this will just be
            a sequence of integers from 0 to `sequence_length`.
        lddt_head (`torch.FloatTensor`):
            Raw outputs from the lddt head used to compute plddt.
        plddt (`torch.FloatTensor`):
            Per-residue confidence scores. Regions of low confidence may indicate areas where the model's prediction is
            uncertain, or where the protein structure is disordered.
        ptm_logits (`torch.FloatTensor`):
            Raw logits used for computing ptm.
        ptm (`torch.FloatTensor`):
            TM-score output representing the model's high-level confidence in the overall structure.
        aligned_confidence_probs (`torch.FloatTensor`):
            Per-residue confidence scores for the aligned structure.
        predicted_aligned_error (`torch.FloatTensor`):
            Predicted error between the model's prediction and the ground truth.
        max_predicted_aligned_error (`torch.FloatTensor`):
            Per-sample maximum predicted error.
    Nframessidechain_framesunnormalized_anglesangles	positionsstatess_ss_zdistogram_logits	lm_logitsaatypeatom14_atom_existsresidx_atom14_to_atom37residx_atom37_to_atom14atom37_atom_existsresidue_index	lddt_headplddt
ptm_logitsptmaligned_confidence_probspredicted_aligned_errormax_predicted_aligned_error)__name__
__module____qualname____doc__r'   torchZFloatTensor__annotations__r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=    rD   rD   g/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/transformers/models/esm/modeling_esmfold.pyr&   ;   s0   
6r&   aJ  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        masking_pattern (`torch.LongTensor` of shape `({0})`, *optional*):
            Locations of tokens to mask during training as a form of regularization. Mask values selected in `[0, 1]`.
        num_recycles (`int`, *optional*, defaults to `None`):
            Number of times to recycle the input sequence. If `None`, defaults to `config.num_recycles`. "Recycling"
            consists of passing the output of the folding trunk back in as input to the trunk. During training, the
            number of recycles should vary with each batch, to ensure that the model learns to output valid predictions
            after each recycle. During inference, num_recycles should be set to the highest value that the model was
            trained with for maximum accuracy. Accordingly, when this value is set to `None`, config.max_recycles is
            used.
c                  C   s   t  t jk} | ot  } | S N)rB   Zget_autocast_gpu_dtypeZfloat16Zis_autocast_enabled)Zfp16_enabledrD   rD   rE   is_fp16_enabled   s    rG   c                  C   s:   t  r
dS zdd l} | j W S  tk
r4   Y dS X d S )NFr   )r   	deepspeedutilsZis_initialized	Exception)rH   rD   rD   rE   is_deepspeed_initialized   s    rK   )samplespad_vreturnc                 C   s   t | dkrt S t dd | D dkrBtddd | D  tdd | D \}d	d td
d | D  D }tjt | f|| d j|d}|| t	t | D ],}|| }| | }||tdd |j
D < q|S )z
    Takes a list of tensors with the following dimensions:
        [(d_11, ..., d_1K),
         (d_21, ..., d_2K), ..., (d_N1, ..., d_NK)]
    and stack + pads them into a single tensor of:
    (N, max_i=1,N { d_i1 }, ..., max_i=1,N {diK})
    r   c                 S   s   h | ]}|  qS rD   dim.0xrD   rD   rE   	<setcomp>   s     z(collate_dense_tensors.<locals>.<setcomp>r   z Samples has varying dimensions: c                 S   s   g | ]}|  qS rD   rO   rQ   rD   rD   rE   
<listcomp>   s     z)collate_dense_tensors.<locals>.<listcomp>c                 S   s   h | ]
}|j qS rD   devicerQ   rD   rD   rE   rT      s     c                 S   s   g | ]}t |qS rD   )max)rR   lstrD   rD   rE   rU      s     c                 S   s   g | ]
}|j qS rD   shaperQ   rD   rD   rE   rU      s     )dtyperW   c                 s   s   | ]}t d |V  qdS )r   Nslice)rR   krD   rD   rE   	<genexpr>   s     z(collate_dense_tensors.<locals>.<genexpr>)lenrB   TensorRuntimeErrortuplezipemptyr\   fill_ranger[   )rL   rM   rW   Z	max_shaperesultiZresult_itrD   rD   rE   collate_dense_tensors   s    "
rl   rk   Zno_dimsc                 C   s   |  | jd |  d S )N))reshaper[   rm   rD   rD   rE   flatten_final_dims   s    rp   )tensorindsc                    sB   dt |  ttt | jd   }| | fdd|D  S )Nrn   c                    s   g | ]} | qS rD   rD   )rR   rj   Z
zero_indexrD   rE   rU      s     z&permute_final_dims.<locals>.<listcomp>)ra   listrh   r[   permute)rq   rr   Z
first_indsrD   rs   rE   permute_final_dims   s    rv   c                    s\   |d }i }|  D ]B\ } fdd|D }t|tkrJt| || < q| || < q|S )Nr   c                    s   g | ]}|  qS rD   rD   )rR   dr_   rD   rE   rU      s     z!dict_multimap.<locals>.<listcomp>)itemstypedictdict_multimap)fnZdictsfirstZnew_dictvZall_vrD   rx   rE   r|      s    r|         ?fan_inc                 C   s   | j }|td|d  }t sTtd t|}tjj	j
| |djdd| d nbddlm} t||jd	d
ddd }|jd	d
d||  d}t||}| tj|| jd d S )Nr   zlThis init requires scipy, but scipy was not found, default to an approximation that might not be equivalent.)std               @)minrX   r   )	truncnorm   )ablocscale)r   r   r   r   sizerV   )r[   rX   r   loggerwarningmathsqrtrB   nninitZnormal_clampZscipy.statsr   r   ZrvsZnumelnpro   copy_rq   rW   )weightsr   fanr[   r   r   rL   rD   rD   rE   trunc_normal_init_   s    
"r   c              	   C   s&   t   d}| | W 5 Q R X d S )NgabR?)rB   no_gradrg   )r   Zsoftplus_inverse_1rD   rD   rE   ipa_point_weights_init_  s    
r   c                	       sD   e Zd ZdZdeeeeeee	j
e	j
gdf  d fddZ  ZS )	EsmFoldLinearz
    A Linear layer with built-in nonstandard initializations. Called just like torch.nn.Linear.

    Implements the initializers in 1.11.4, plus some additional ones found in the code.
    TdefaultN)in_dimout_dimbiasr   init_fnc              	      sV   t  j|||d |r6t  | jd W 5 Q R X || _|| _|dkrRtddS )aM  
        Args:
            in_dim:
                The final dimension of inputs to the layer
            out_dim:
                The final dimension of layer outputs
            bias:
                Whether to learn an additive bias. True by default
            init:
                The initializer to use. Choose from:

                "default": LeCun fan-in truncated normal initialization "relu": He initialization w/ truncated normal
                distribution "glorot": Fan-average Glorot uniform initialization "gating": Weights=0, Bias=1 "normal":
                Normal initialization with std=1/sqrt(fan_in) "final": Weights=0, Bias=0

                Overridden by init_fn if the latter is not None.
            init_fn:
                A custom initializer taking weight and bias as inputs. Overrides init if not None.
        r   r   )r   reluglorotgatingnormalfinalzInvalid init string.N)	super__init__rB   r   r   rg   r   r   
ValueError)selfr   r   r   r   r   	__class__rD   rE   r     s    
zEsmFoldLinear.__init__)Tr   N)r>   r?   r@   rA   intboolstrr   r   rB   rb   r   __classcell__rD   rD   r   rE   r     s   
   r   c                       s&   e Zd Zd fdd	Zdd Z  ZS )EsmFoldLayerNormh㈵>c                    s@   t    |f| _|| _tt|| _tt	|| _
d S rF   )r   r   c_inepsr   	ParameterrB   Zonesweightzerosr   )r   r   r   r   rD   rE   r   :  s
    
zEsmFoldLayerNorm.__init__c              	   C   s   |j }|tjkr`t s`tjjjdd2 tj	|| j
| jj|d| jj|d| j}W 5 Q R X ntj	|| j
| j| j| j}|S )NFenabledr\   )r\   rB   bfloat16rK   cudaampautocastr   
functional
layer_normr   r   tor   r   )r   rS   rw   outrD   rD   rE   forwardC  s    8zEsmFoldLayerNorm.forward)r   r>   r?   r@   r   r   r   rD   rD   r   rE   r   9  s   	r   rn   )rk   rP   rN   c              	   C   s\   | j }|tjkrFt sFtjjjdd tjjj	| |d}W 5 Q R X ntjjj	| |d}|S )z[
    Softmax, but without automatic casting to fp32 when the input is of type bfloat16
    Fr   rO   )
r\   rB   r   rK   r   r   r   r   r   softmax)rk   rP   rw   srD   rD   rE   softmax_no_castN  s    r   c                       s   e Zd ZdZdeeeeeed fddZejeje	ejejejf dddZ
ejejejd	d
dZdejejeeej  eeeeeeej ejd
ddZ  ZS )EsmFoldAttentionzu
    Standard multi-head attention using AlphaFold's default layer initialization. Allows multiple bias vectors.
    T)c_qc_kc_vc_hiddenno_headsr   c                    s   t    || _|| _|| _|| _|| _|| _t| j| j| j ddd| _	t| j| j| j ddd| _
t| j| j| j ddd| _t| j| j | jdd| _d| _| jrt| j| j| j dd| _t | _dS )a  
        Args:
            c_q:
                Input dimension of query data
            c_k:
                Input dimension of key data
            c_v:
                Input dimension of value data
            c_hidden:
                Per-head hidden dimension
            no_heads:
                Number of attention heads
            gating:
                Whether the output should be gated using query data
        Fr   r   r   r   r   Nr   )r   r   r   r   r   r   r   r   r   linear_qlinear_klinear_vlinear_olinear_gr   Sigmoidsigmoid)r   r   r   r   r   r   r   r   rD   rE   r   b  s    
zEsmFoldAttention.__init__)q_xkv_xrN   c                 C   s   |  |}| |}| |}||jd d | jdf }||jd d | jdf }||jd d | jdf }|dd}|dd}|dd}|t| j	 }|||fS )Nrn   r   )
r   r   r   viewr[   r   	transposer   r   r   )r   r   r   qr_   r   rD   rD   rE   	_prep_qkv  s    


zEsmFoldAttention._prep_qkv)or   rN   c                 C   sX   | j d k	r@| |  |}||jd d | jdf }|| }t|d}| |}|S )Nrn   r   )r   r   r   r[   r   rp   r   )r   r   r   grD   rD   rE   _wrap_up  s    


zEsmFoldAttention._wrap_upNF      )
r   r   biasesuse_memory_efficient_kerneluse_lmalma_q_chunk_sizelma_kv_chunk_size	use_flash
flash_maskrN   c
                 C   s   |r|dks|dkrt d|r0|dk	r0t d|||g}
t|
dkrNt d|dkrZg }| ||\}}}t|d}t||}|D ]}||7 }qt|d}t||}|dd	}| ||}|S )
a{  
        Args:
            q_x:
                [*, Q, C_q] query data
            kv_x:
                [*, K, C_k] key data
            biases:
                List of biases that broadcast to [*, H, Q, K]
            use_memory_efficient_kernel:
                Whether to use a custom memory-efficient attention kernel. This should be the default choice for most.
                If none of the "use_<...>" flags are True, a stock PyTorch implementation is used instead
            use_lma:
                Whether to use low-memory attention (Staats & Rabe 2021). If none of the "use_<...>" flags are True, a
                stock PyTorch implementation is used instead
            lma_q_chunk_size:
                Query chunk size (for LMA)
            lma_kv_chunk_size:
                Key/Value chunk size (for LMA)
        Returns
            [*, Q, C_q] attention update
        NzPIf use_lma is specified, lma_q_chunk_size and lma_kv_chunk_size must be providedzSuse_flash is incompatible with the bias option. For masking, use flash_mask insteadr   z2Choose at most one alternative attention algorithm)r   r   rn   r   r   )	r   sumr   rv   rB   matmulr   r   r   )r   r   r   r   r   r   r   r   r   r   Zattn_optionsquerykeyvalueoutputr   rD   rD   rE   r     s&    !



zEsmFoldAttention.forward)T)NFFr   r   FN)r>   r?   r@   rA   r   r   r   rB   rb   r	   r   r   r   r   r   r   rD   rD   r   rE   r   ]  s>    /&       r   c                
       sv   e Zd Zd fdd	Zejjdejeej e	e
e
e
ejdddZdejeej ee	 e
e
e
ejd
ddZ  ZS )EsmFoldTriangleAttentionT    eAc                    sh   t    || _|| _|| _|| _|| _t| j| _t	|| jddd| _
t| j| j| j| j| j| _dS )z
        Args:
            c_in:
                Input channel dimension
            c_hidden:
                Overall hidden channel dimension (not per-head)
            no_heads:
                Number of attention heads
        Fr   r   N)r   r   r   r   r   startinginfr   r   r   linearr   mha)r   r   r   r   r   r   r   rD   rE   r     s    

z!EsmFoldTriangleAttention.__init__F)rS   r   
chunk_sizer   r   inplace_saferN   c                 C   s@   |||d}t t| j||d||t|jdd |r8|nddS )ztriangle! triangle!)r   r   r   )r   r   Nr   )r   Zno_batch_dimsZ_out)r   r   r   ra   r[   )r   rS   r   r   r   r   r   Z
mha_inputsrD   rD   rE   _chunk  s    
zEsmFoldTriangleAttention._chunkN)rS   maskr   r   r   r   rN   c           
      C   s   |dkr| |jdd }| js:|dd}|dd}| |}| j|d  dddddddf }t| |d}|d}||g}	|dk	r| j	||	||||d	}n| j
|||	||d
}| js|dd}|S )z
        Args:
            x:
                [*, I, J, C_in] input tensor (e.g. the pair representation)
        Returns:
            [*, I, J, C_in] output tensor
        Nrn   r   r   r   .r   r   r   )r   r   r   )r   r   r   r   r   )new_onesr[   r   r   r   r   rv   r   	unsqueezer   r   )
r   rS   r   r   r   r   r   Z	mask_biasZtriangle_biasr   rD   rD   rE   r   '  s>    
$
	    z EsmFoldTriangleAttention.forward)Tr   )FFF)NNFFF)r>   r?   r@   r   rB   jitignorerb   r   r   r   r   r   r   r   rD   rD   r   rE   r     s8           r   c                       s   e Zd ZdZd fdd	Zdejejee ejdddZ	dejeej ee e
d	d
dZdejeej e
e
ee ejdddZ  ZS )#EsmFoldTriangleMultiplicativeUpdatez*
    Implements Algorithms 11 and 12.
    Tc                    s   t    |j}|| _t||| _t||dd| _t||| _t||dd| _t||dd| _	t||dd| _
t|| _t|| _t | _d S )Nr   r   r   )r   r   pairwise_state_dim	_outgoingr   
linear_a_p
linear_a_g
linear_b_p
linear_b_gr   linear_zr   layer_norm_inlayer_norm_outr   r   r   )r   configr  r   r   rD   rE   r   h  s    


z,EsmFoldTriangleMultiplicativeUpdate.__init__N)r   r   _inplace_chunk_sizerN   c                 C   s   | j rt|d}t|d}nt|d}t|d}|d k	rtd|jd |D ]l}|d||| d d d d f }|d||| d d d d f }t|||d||| d d d d f< qJ|}nt||}t|dS )Nr   )r   r   r   r   r   .r   r   r   )r  rv   rh   r[   rB   r   )r   r   r   r  rj   Za_chunkb_chunkprD   rD   rE   _combine_projectionsy  s     


  "z8EsmFoldTriangleMultiplicativeUpdate._combine_projections)zr   inplace_chunk_sizewith_addc                     s8  |dkr| |jdd }|d}dfdd	dfdd	}|||ddd}dk	r|jd d	 d	  d
d jrn }dd fdd fdd}t|j}	|	 < ||	}
|
}td| < |
||  d}ttd}dd t	||dd g D }tt}fdd|D }t	|| || }|D ]j\}}|s|kr||
|}
d}|||| |}|||| |}|
 }| krڈ||||  }nN|s|}td| < |
||| ||< n| }|
||| }|||ddd}~t||}t|d}|}|}||||  }|}|  ~||9 }|}t||| | < |r||  |7  < n|||< qhn\|||dd}t||}|}|}|}|  ||9 }|r0||7 }n|}|S )a  
        Args:
            z:
                A [*, N, N, C_z] pair representation
            mask:
                A [*, N, N] pair mask
            inplace_chunk_size:
                Size of chunks used in the main computation. Increase to trade memory for speed.
            with_add:
                If True, z is overwritten with (z + update). Otherwise, it is overwritten with (update).
        Returns:
            A reference to the overwritten z

        More memory-efficient, inference-only version of the forward function. Uses in-place operations, fusion of the
        addition that happens after this module in the Evoformer, a smidge of recomputation, and a cache of overwritten
        values to lower peak memory consumption of this module from 5x the size of the input tensor z to 2.5x its size.
        Useful for inference on extremely long sequences.

        It works as follows. We will make reference to variables used in the default forward implementation below.
        Naively, triangle multiplication attention requires the manifestation of 5 tensors the size of z: 1) z, the
        "square" input tensor, 2) a, the first projection of z, 3) b, the second projection of b, 4) g, a z-sized mask,
        and 5) a z-sized tensor for intermediate computations. For large N, this is prohibitively expensive; for
        N=4000, for example, z is more than 8GB alone. To avoid this problem, we compute b, g, and all intermediate
        tensors in small chunks, noting that the chunks required to compute a chunk of the output depend only on the
        tensor a and corresponding vertical and horizontal chunks of z. This suggests an algorithm that loops over
        pairs of chunks of z: hereafter "columns" and "rows" of z, even though each "column" and "row" in fact contains
        inplace_chunk_size contiguous true columns and rows of z. Writing output chunks to a new tensor would bring
        total memory consumption down to 3x the size of z. However, more memory can be saved by writing output chunks
        directly to z in-place. WLOG, we choose to write output chunks vertically, overwriting the ith "column" of z at
        the end of the ith iteration of the main loop. Despite this overwriting, the ith column is always one column
        ahead of previously overwritten columns and can be recovered directly from z. After the first iteration,
        however, the ith row of z is always at least partially overwritten. For this reason, we introduce the z-cache,
        a tensor one-half the size of z. The z-cache initially contains the left half (2nd and 3rd quadrants) of z. For
        0 < i < N/2, the missing left part of the ith row of z is recovered from this cache at the beginning of the ith
        iteration. Once i exceeds n/2, the cache is "reoriented" to encompass the 3rd and 4th quadrants of z instead.
        Though the 3rd quadrant of the original z is entirely overwritten at this point, it can be recovered from the
        z-cache itself. Thereafter, the ith row of z can be recovered in its entirety from the reoriented z-cache.
        After the final iteration, z has been completely overwritten and contains the triangular multiplicative update.
        If with_add is True, it instead contains the sum of z and the triangular multiplicative update. In either case,
        peak memory consumption is just 2.5x the size of z, disregarding memory used for chunks and other small
        variables.
        Nrn   Tc                    sZ   |r j } j}n j} j} | } || }|  ||| 9 }||9 }t|d}|S )Nr   )r  r  r  r  r	  sigmoid_rv   )pairr   r   r   Zlinear_pr  r   rD   rE   compute_projection_helper  s    

zYEsmFoldTriangleMultiplicativeUpdate._inference_forward.<locals>.compute_projection_helperc              	      s:  j |A }|s. | ||}|r*|dd}n|r8jnj}|jjd }| jd d |f | jdd  }| |}td| jd D ]}	| d|	|	 d d d d f }
 | d|	|	 d d d d f |d|	|	 d d d d f |}
|r|
dd}
|
|d|	|	 f< n|
|d|	|	 d d f< ~
q|S )Nrn   r   r   r   .)r  r   r  r  r   r[   	new_zerosrh   )r  r   r   chunkedZneed_transposer  r   cZ	out_shaperj   Z
pair_chunk)r  r  r   rD   rE   compute_projection  s,    
"
 zREsmFoldTriangleMultiplicativeUpdate._inference_forward.<locals>.compute_projection)r  r   r   r   c                 S   s   dd | j D S )Nc                 S   s   g | ]}t d qS rF   r]   rR   _rD   rD   rE   rU     s     z`EsmFoldTriangleMultiplicativeUpdate._inference_forward.<locals>.empty_slicer.<locals>.<listcomp>rZ   )rk   rD   rD   rE   empty_slicer  s    zLEsmFoldTriangleMultiplicativeUpdate._inference_forward.<locals>.empty_slicerc                    s    | }t ||||< | | S rF   r]   )rk   startendrP   r   )r  rD   rE   slice_tensor  s    zLEsmFoldTriangleMultiplicativeUpdate._inference_forward.<locals>.slice_tensorc                    s   | d }|   } | dd d d d d d f } | }td| < || |< |d }|d  }| }td | < || |< | S )N.r   r   )r   r^   )z_cacher  Z
quadrant_3Zfirst_half_slicerZ
quadrant_4Zquadrant_3_slicer)col_dimr  half_nnrow_dimr!  rD   rE   flip_z_cache_  s     zMEsmFoldTriangleMultiplicativeUpdate._inference_forward.<locals>.flip_z_cache_r   Fc                 S   s   g | ]\}}|| qS rD   rD   )rR   Zi_1Zi_2rD   rD   rE   rU   4  s     zJEsmFoldTriangleMultiplicativeUpdate._inference_forward.<locals>.<listcomp>r   c                    s   g | ]} qS rD   rD   r  )r  rD   rE   rU   6  s     )r   r  r  )T)TT)r   r[   r   r  rt   r  r^   r   rh   re   clonerB   r   rv   r
  r  r   r	  r  ) r   r  r   r  r  r  r   Zb_chunk_dimr'  Zz_cache_shaper"  Zz_cache_slicerZz_cache_rotatedZi_rangeZinitial_offsetsZ
after_halfZafter_half_offsetsZcombined_range_with_offsetsrj   offsetZ	z_chunk_bZ
mask_chunkZz_chunk_slicerZz_cache_offsetr  Zx_chunkZ	z_chunk_gZg_chunkZz_slicerr   rS   r   rD   )	r#  r  r  r$  r  r%  r&  r   r!  rE   _inference_forward  s    1
!



"








z6EsmFoldTriangleMultiplicativeUpdate._inference_forwardF   )r  r   r   _add_with_inplacer  rN   c           
   	   C   s  |r| j ||||d}|S |dkr6||jdd }|d}| |}|}|| | | }|| | }|}|| | | }|| 	| }t
 rtjjjdd | | | }W 5 Q R X n| ||}~~| |}| |}| | |}	||	 }|S )z
        Args:
            x:
                [*, N_res, N_res, C_z] input tensor
            mask:
                [*, N_res, N_res] input mask
        Returns:
            [*, N_res, N_res, C_z] output tensor
        )r  r  Nrn   Fr   )r*  r   r[   r   r	  r   r  r  r  r  rG   rB   r   r   r   r  floatr
  r  r   )
r   r  r   r   r,  r  rS   r   r   r   rD   rD   rE   r   v  s8    

 

z+EsmFoldTriangleMultiplicativeUpdate.forward)T)N)NNT)NFFr+  )r>   r?   r@   rA   r   rB   rb   r   r   r  r   r*  r   r   rD   rD   r   rE   r  c  s>          g    r  c                       s    e Zd ZdZ fddZ  ZS )EsmFoldPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    c              	      s  t |trt  |jdk	r0||j|j n|jdkrJt|jdd n|jdkrdt|jdd n~|jdkrt	jj
|jdd	 n`|jd
kr|jd |jr|jd n6|jdkrtj	jj|jdd n|jdkr|jd W 5 Q R X nt |tr
t|j nt |trtj	j|jjj tj	j|jjj tj	j|jjj tj	j|jjj tj	j|jjjj tj	j|jjjj tj	j|jjjj tj	j|jjjj tj	j|jjj tj	j|jjj tj	j|jjj tj	j|jjj tj	j|jjj tj	j|jjd j tj	j|jjd j tj	j|j jd j tj	j|j jd j nt! "| dS )zInitialize the weightsNr   r   )r   r   r   r   r   )Zgainr   r   r   r   )Znonlinearityr   r   )#
isinstancer   rB   r   r   r   r   r   r   r   Zxavier_uniform_rg   Zkaiming_normal_EsmFoldInvariantPointAttentionr   head_weights#EsmFoldTriangularSelfAttentionBlockzeros_
tri_mul_inr  tri_mul_outtri_att_startr   r   tri_att_endsequence_to_pairo_projpair_to_sequencer   seq_attentionmlp_seqmlpmlp_pairr   _init_weights)r   moduler   rD   rE   r?    sN    








z$EsmFoldPreTrainedModel._init_weights)r>   r?   r@   rA   r?  r   rD   rD   r   rE   r.    s   r.  c                       s(   e Zd Zd fdd	ZdddZ  ZS )	EsmFoldSelfAttentionFc                    s   t    ||| kst|| _|| _|| _tj||d dd| _tj||dd| _	|| _
|rt||| _tjj| jj tjj| jj | jd | _tjj| j	j d S )Nr   Fr   T      )r   r   AssertionError	embed_dim	num_heads
head_widthr   Linearprojr9  gatedg_projrB   r   r3  r   Zones_r   rescale_factor)r   rD  rE  rF  rI  r   rD   rE   r     s    
zEsmFoldSelfAttention.__init__Nc                 C   s  |  |j|jdd | jdf }|dddd}|jddd\}}}| j| }td||}	|dk	r||	|dddd }	|dk	r|ddddf }|		|d	kt
j }	tjj|	dd}	td
|	|}
|
j|
jdd d }
| jr| | |
 }
| |
}
|
|	ddddfS )a  
        Basic self attention with optional mask and external pairwise bias. To handle sequences of different lengths,
        use mask.

        Inputs:
            x: batch of input sequneces (.. x L x C) mask: batch of boolean masks where 1=valid, 0=padding position (..
            x L_k) bias: batch of scalar pairwise attention biases (.. x Lq x Lk x num_heads)

        Outputs:
          sequence projection (B x L x embed_dim), attention maps (B x L x L x num_heads)
        Nr   rn   r   r   r   rO   z...qc,...kc->...qkFz...hqk,...hkc->...qhc)rn   )rH  r   r[   rE  ru   chunkrK  rB   Zeinsummasked_fillr   r   r   r   r   ro   rI  rJ  r   r9  )r   rS   r   r   indicesrk   r   r_   r   r   yrD   rD   rE   r     s"    $

zEsmFoldSelfAttention.forward)F)NNNr   rD   rD   r   rE   rA    s   rA  c                       sH   e Zd ZdZeeeee f d fddZe	j
e	j
dddZ  ZS )EsmFoldDropoutzl
    Implementation of dropout with the ability to share the dropout mask along a particular dimension.
    )r	batch_dimc                    s:   t    || _t|tkr"|g}|| _t| j| _d S rF   )	r   r   rQ  rz   r   rR  r   Dropoutdropout)r   rQ  rR  r   rD   rE   r   #  s    
zEsmFoldDropout.__init__)rS   rN   c                 C   s<   t |j}| jd k	r(| jD ]}d||< q|| || S )Nr   )rt   r[   rR  rT  r   )r   rS   r[   ZbdrD   rD   rE   r   ,  s
    



zEsmFoldDropout.forward)r>   r?   r@   rA   r-  r
   r   r   r   rB   rb   r   r   rD   rD   r   rE   rP    s    	rP  c                       s$   e Zd Z fddZdd Z  ZS )EsmFoldSequenceToPairc                    sj   t    t|| _tj||d dd| _tjd| |dd| _tjj	
| jj tjj	
| jj d S )Nr   Tr   )r   r   r   r   	layernormrG  rH  r9  rB   r   r3  r   )r   sequence_state_dim	inner_dimr  r   rD   rE   r   5  s    
zEsmFoldSequenceToPair.__init__c                 C   s   t |jdkst| |}| |}|jddd\}}|dddddddf |dddddddf  }|dddddddf |dddddddf  }tj||gdd}| |}|S )z
        Inputs:
          sequence_state: B x L x sequence_state_dim

        Output:
          pairwise_state: B x L x L x pairwise_state_dim

        Intermediate state:
          B x L x L x 2*inner_dim
        r   r   rn   rO   N)	ra   r[   rC  rV  rH  rL  rB   catr9  )r   sequence_stater   r   r_   proddiffrS   rD   rD   rE   r   ?  s    

88
zEsmFoldSequenceToPair.forwardr   rD   rD   r   rE   rU  4  s   
rU  c                       s$   e Zd Z fddZdd Z  ZS )EsmFoldPairToSequencec                    s,   t    t|| _tj||dd| _d S )NFr   )r   r   r   r   rV  rG  r   )r   r  rE  r   rD   rE   r   [  s    
zEsmFoldPairToSequence.__init__c                 C   s*   t |jdkst| |}| |}|S )z
        Inputs:
          pairwise_state: B x L x L x pairwise_state_dim

        Output:
          pairwise_bias: B x L x L x num_heads
           )ra   r[   rC  rV  r   )r   pairwise_stater  Zpairwise_biasrD   rD   rE   r   a  s    

zEsmFoldPairToSequence.forwardr   rD   rD   r   rE   r]  Z  s   r]  c                       s&   e Zd Zd fdd	Zdd Z  ZS )EsmFoldResidueMLPr   c              	      sB   t    tt|t||t t||t|| _d S rF   )	r   r   r   
Sequentialr   rG  ReLUrS  r=  )r   rD  rX  rT  r   rD   rE   r   p  s    


zEsmFoldResidueMLP.__init__c                 C   s   ||  | S rF   )r=  )r   rS   rD   rD   rE   r   {  s    zEsmFoldResidueMLP.forward)r   r   rD   rD   r   rE   r`  o  s   r`  c                       s&   e Zd Z fddZdddZ  ZS )r2  c                    s  t    || _|j}|j}||j }||j }t|| _	t
||d || _t||| _t|||jdd| _t|dd| _t|dd| _t||j|ddd| _t||j|ddd| _t|d| |jd	| _t|d| |jd	| _t|j| _t|jd d| _t|jd d
| _d S )Nr   T)rI  )r  Fr   )r   r   r^  )rT  r   )r   r   r  rW  r  Zsequence_head_widthZpairwise_head_widthr   r   layernorm_1rU  r8  r]  r:  rA  r;  r  r5  r4  r   r6  r7  r`  rT  r<  r>  rS  droprP  row_dropcol_drop)r   r  rW  r  Zsequence_num_headsZpairwise_num_headsr   rD   rE   r     sH    


           z,EsmFoldTriangularSelfAttentionBlock.__init__Nc                 K   s0  t |jdkr$tdt |j dt |jdkrHtdt |j d|dk	rtt |jdkrttdt |j d|j\}}}|jd }	|| jjkrtd	| d
| jj d|	| jjkrtd|	 d
| jj d||jd krtd| d
|jd  d||jd ks"||jd krJtd| d
|jd  d|jd  d| |}
| |}| j|||
d\}}|| 	| }| 
|}|| | }|dk	r|d|d nd}|| | j||d }|| | j||d }|| | j|||d }|| | j|||d }| |}||fS )a*  
        Inputs:
          sequence_state: B x L x sequence_state_dim pairwise_state: B x L x L x pairwise_state_dim mask: B x L boolean
          tensor of valid positions

        Output:
          sequence_state: B x L x sequence_state_dim pairwise_state: B x L x L x pairwise_state_dim
        r   z,`sequence_state` should be a 3d-tensor, got  dims.r^  z,`pairwise_state` should be a 4d-tensor, got Nr   "`mask` should be a 2d-tensor, got zQ`sequence_state` last dimension should be equal to `self.sequence_state_dim`. Got != .zR`pairwise_state` last dimension should be equal to `self.pairwise_state_dim`. Got r   zD`sequence_state` and `pairwise_state` have inconsistent batch size: r   zI`sequence_state` and `pairwise_state` have inconsistent sequence length: z or )r   r   r   )r   r   )ra   r[   r   r  rW  r  r:  rc  r;  rd  r<  r8  r   re  r5  rf  r4  r6  r7  r>  )r   rZ  r_  r   r   Z,_EsmFoldTriangularSelfAttentionBlock__kwargsrR  seq_dimrW  r  r   rO  r  Ztri_maskrD   rD   rE   r     sR    	
 "


"
z+EsmFoldTriangularSelfAttentionBlock.forward)NNr   rD   rD   r   rE   r2    s   "r2  c                   @   s&   e Zd ZdddZdd Zdd	 Zd
S )EsmCategoricalMixture2   r   r   c                 C   sH   || _ tj|||d | j j| j jd}|d d |dd   d | _d S )Nr   rW   r\   rn   r   )logitsrB   linspacerW   r\   v_bins)r   parambinsr  r   rD   rD   rE   r     s     zEsmCategoricalMixture.__init__c                 C   sN   | d| jd g|j    d}| jd}tj|| ddd	dS )Nrn   rO   )
r   rr  ndimabsZargminrp  Zlog_softmaxrB   Ztake_along_dimsqueeze)r   trueZ
true_indexZnllrD   rD   rE   log_prob  s    &zEsmCategoricalMixture.log_probc                 C   s   | j d| jd dS )Nrn   r   )rp  r   rr  r   rw  r  rD   rD   rE   mean  s    zEsmCategoricalMixture.meanN)rn  r   r   )r>   r?   r@   r   ry  rz  rD   rD   rD   rE   rm    s   
rm  rn  c                 C   s   t | |d S )Nrt  )rm  rz  )rp  rt  rD   rD   rE   categorical_lddt  s    r|  c                 C   sb   | dkrdS t | jdkr0tdt | j d| j\}}| d|||}||| |}|S )z
    Helper to convert B x L mask of valid positions to axial mask used in row column attentions.

    Input:
      mask: B x L tensor of booleans

    Output:
      mask: B x L x L tensor of booleans
    Nr   rh  rg  r   )ra   r[   r   r   expandro   )r   rR  rl  mrD   rD   rE   get_axial_mask  s    
r  c                       s&   e Zd Z fddZdddZ  ZS )EsmFoldRelativePositionc                    s2   t    |j| _tjd| j d |j| _d S Nr   )	r   r   Zposition_binsrt  rB   r   	Embeddingr  	embeddingr   r  r   rD   rE   r     s    
z EsmFoldRelativePosition.__init__Nc                 C   s   |j tjkrtd|j  d|dk	rL|j|jkrLtd|j d|j d|dddddf |dddddf  }|| j | j}|| j d }|dk	r|dddddf |dddddf  }d||d	k< | |}|S )
z
        Input:
          residue_index: B x L tensor of indices (dytpe=torch.long) mask: B x L tensor of booleans

        Output:
          pairwise_state: B x L x L x pairwise_state_dim tensor of embeddings
        z`residue_index` has dtype z, it should be `torch.long`.Nz5`residue_index` and `mask` have inconsistent shapes: ri  rj  r   r   F)r\   rB   longr   r[   r   rt  r  )r   r6   r   r\  r   rD   rD   rE   r     s    ,,
zEsmFoldRelativePosition.forward)Nr   rD   rD   r   rE   r    s   r  c                       s0   e Zd Z fddZejejdddZ  ZS )EsmFoldAngleResnetBlockc                    s@   t    t|j|jdd| _t|j|jdd| _t | _d S Nr   r   r   )	r   r   r   
resnet_dimlinear_1linear_2r   rb  r   r  r   rD   rE   r   :  s    
z EsmFoldAngleResnetBlock.__init__)r   rN   c                 C   s4   |}|  |}| |}|  |}| |}|| S rF   )r   r  r  )r   r   	s_initialrD   rD   rE   r   B  s    



zEsmFoldAngleResnetBlock.forward)r>   r?   r@   r   rB   rb   r   r   rD   rD   r   rE   r  9  s   r  c                       sB   e Zd ZdZ fddZejejeejejf dddZ  Z	S )EsmFoldAngleResnetz.
    Implements Algorithm 20, lines 11-14
    c                    s   t    || _t|j|j| _t|j|j| _t	 | _
t|jD ]}t|}| j
| qDt|j|jd | _t | _d S r  )r   r   r  r   sequence_dimr  	linear_inlinear_initialr   
ModuleListlayersrh   Znum_resnet_blocksr  appendZ
num_angles
linear_outrb  r   )r   r  r  layerr   rD   rE   r   R  s    

zEsmFoldAngleResnet.__init__)r   r  rN   c                 C   s   |  |}| |}|  |}| |}|| }| jD ]}||}q6|  |}| |}||jdd d }|}ttj	tj
|d ddd| jjd}|| }||fS )a  
        Args:
            s:
                [*, C_hidden] single embedding
            s_initial:
                [*, C_hidden] single embedding as of the start of the StructureModule
        Returns:
            [*, no_angles, 2] predicted angles
        Nrn   )rn   r   r   T)rP   Zkeepdim)r   )r   r  r  r  r  r   r[   rB   r   r   r   r  epsilon)r   r   r  lZunnormalized_sZ
norm_denomrD   rD   rE   r   b  s&    







zEsmFoldAngleResnet.forward
r>   r?   r@   rA   r   rB   rb   r	   r   r   rD   rD   r   rE   r  M  s   r  c                	       sR   e Zd ZdZ fddZd	ejeej eeje	ee
ej  ejdddZ  ZS )
r0  z"
    Implements Algorithm 22.
    c                    s
  t    || _|j}|j}|j| _|j| _|j	| _	|j
| _
|j|j }t||| _t|d| | _|j|j	 d }t||| _|j|j	|j
  d }t||| _t||j| _tt|j| _|j||j |j
d   }t||dd| _tjdd| _t | _d S )Nr   r   r^  r   r   rn   rO   )r   r   r  r  pairwise_dimZipa_dim
hidden_dimZnum_heads_iparE  num_qk_pointsnum_v_pointsr   r   	linear_kvlinear_q_pointslinear_kv_pointslinear_br   r   rB   r   r1  r  ZSoftmaxr   ZSoftplussoftplus)r   r  c_sc_zZhcZhpqZhpkvZconcat_out_dimr   rD   rE   r     s*    
z'EsmFoldInvariantPointAttention.__init__FN)r   r  rQ  r   _offload_inference_z_reference_listrN   c              	   C   s<  |g}|  |}| |}||jdd | jdf }||jdd | jdf }tj|| jdd\}	}
| |}tj||jd d dd}tj	|dd}|d 
|}||jdd | j| jdf }| |}tj||jd d dd}tj	|dd}|d 
|}||jdd | jddf }tj|| j| jgdd\}}| |d }|rt|d dksvt|d  |d< t rtjjjd	d
& tt| dt|	 d}W 5 Q R X ntt|dt|	d}|tdd| j  9 }|tdt|d 7 }|d|d }|d }ttj|dd}| | j jdt!|jdd  d  }|tdd| jd d    }|| }tj|ddd }|d|d }| j"j#|d  }t|d}|| }||d }| $|}t||
%ddj&|j'd%dd}t(|d}tj|dddddddf t|dddddddf  dd}t|d}|d )|}t(ttj|d dd| j"j* d}|j+|jdd d }|r|d &|j,|d< t|%dd|d j&|j'd}t(|d}| -tj.|ftj|dd||fddj&|d j'd}|S )ab  
        Args:
            s:
                [*, N_res, C_s] single representation
            z:
                [*, N_res, N_res, C_z] pair representation
            r:
                [*, N_res] transformation object
            mask:
                [*, N_res] mask
        Returns:
            [*, N_res, C_s] single representation update
        Nrn   rO   r   ).Nr   r   r   Fr   )r   r   r   r  r   gUUUUUU?r   r   )r   )rn   r   g      "@rB  r   r   r   .)r   r   r   r   )r   r   r   r   ).NN)rn   r   )/r   r  r   r[   rE  rB   splitr  r  stackapplyr  r  r  r  sysgetrefcountrC  cpurG   r   r   r   r   rv   r-  r   r   r   r   Zunbindr  r1  ra   r  r   r   r   r   r\   rp   Zinvert_applyr  ro   rW   r  rY  )r   r   r  rQ  r   r  r  r   kvr_   r   Zq_ptsZkv_ptsZk_ptsZv_ptsr   r   Zpt_attr1  Zsquare_maskr   Zo_ptZ	o_pt_normZo_pairrD   rD   rE   r     s    


"
 ( 

&
6
&"
0z&EsmFoldInvariantPointAttention.forward)FN)r>   r?   r@   rA   r   rB   rb   r   r   r   r   r   r   rD   rD   r   rE   r0    s   )  r0  c                       s>   e Zd ZdZ fddZejeejejf dddZ  Z	S )EsmFoldBackboneUpdatez*
    Implements part of Algorithm 23.
    c                    s    t    t|jddd| _d S )N   r   r   )r   r   r   r  r   r  r   rD   rE   r   X  s    
zEsmFoldBackboneUpdate.__init__)r   rN   c                 C   s   |  |}|S )z
        Args:
            [*, N_res, C_s] single representation
        Returns:
            [*, N_res, 6] update vector
        )r   )r   r   updaterD   rD   rE   r   ]  s    
zEsmFoldBackboneUpdate.forwardr  rD   rD   r   rE   r  S  s   r  c                       s$   e Zd Z fddZdd Z  ZS )%EsmFoldStructureModuleTransitionLayerc                    sT   t    t|j|jdd| _t|j|jdd| _t|j|jdd| _t | _	d S r  )
r   r   r   r  r  r  linear_3r   rb  r   r  r   rD   rE   r   k  s
    
z.EsmFoldStructureModuleTransitionLayer.__init__c                 C   sB   |}|  |}| |}| |}| |}| |}|| }|S rF   )r  r   r  r  )r   r   r  rD   rD   rE   r   t  s    




z-EsmFoldStructureModuleTransitionLayer.forwardr   rD   rD   r   rE   r  j  s   	r  c                       s$   e Zd Z fddZdd Z  ZS ) EsmFoldStructureModuleTransitionc                    s\   t    || _t | _t|jD ]}t|}| j	| q$t
|j| _t|j| _d S rF   )r   r   r  r   r  r  rh   Znum_transition_layersr  r  rS  dropout_raterT  r   r  r   )r   r  r  r  r   rD   rE   r     s    

z)EsmFoldStructureModuleTransition.__init__c                 C   s,   | j D ]}||}q| |}| |}|S rF   )r  rT  r   )r   r   r  rD   rD   rE   r     s
    



z(EsmFoldStructureModuleTransition.forwardr   rD   rD   r   rE   r    s   r  c                       s>   e Zd Z fddZdddZdd Zd	d
 Zdd Z  ZS )EsmFoldStructureModulec                    s~   t    || _t|j| _t|j| _t|j|j| _	t
|| _t|j| _t|j| _t|| _t|| _t|| _d S rF   )r   r   r  r   r  layer_norm_sr  layer_norm_zr   r  r0  ipar   rS  r  ipa_dropoutlayer_norm_ipar  
transitionr  	bb_updater  angle_resnetr  r   rD   rE   r     s    



zEsmFoldStructureModule.__init__NFc              
   C   s  |d }|dkr$| |jdd }| |}| |d }d}|rtt|d dksZt|d  |d< |g}d}|}| |}t	j
|jdd |j|j| jdd}	g }
t| jjD ]}|| j|||	|||d }| |}| |}| |}|	| |}	t	t|	  dd	|	 }|| jj}| ||\}}| |||}| ||}|	| jj}|  |! ||||d
}|
"| |	# }	q~~|r|d $|j|d< t%t&j'|
}
||
d< |
S )a  
        Args:
            evoformer_output_dict:
                Dictionary containing:
                    "single":
                        [*, N_res, C_s] single representation
                    "pair":
                        [*, N_res, N_res, C_z] pair representation
            aatype:
                [*, N_res] amino acid indices
            mask:
                Optional [*, N_res] sequence mask
        Returns:
            A dictionary of outputs
        singleNrn   r  r   Zquat)fmt)r  r  )Zrot_matsZquats)r'   r(   r)   r*   r+   r,   )(r   r[   r  r  r  r  rC  r  r  r   identityr\   rW   Ztrainingrh   r  
num_blocksr  r  r  r  Zcompose_q_update_vecr  r   get_rotsZget_rot_matsZ	get_transZscale_translationZtrans_scale_factorr  r%   r!   Zto_tensor_7Zto_tensor_4x4r  Zstop_rot_gradientr   r|   rB   r  )r   Zevoformer_output_dictr1   r   r  r   r  Zz_reference_listr  Zrigidsoutputsrj   Zbackb_to_globalr)   r*   Zall_frames_to_globalZpred_xyzZscaled_rigidspredsrD   rD   rE   r     st    




	

zEsmFoldStructureModule.forwardc                 C   s   t | ds*| jdtjtj||dddd t | dsR| jdtjtj|dddd t | ds|| jdtjtj||dddd t | ds| jdtjtj||dddd d S )	Ndefault_framesF)r\   rW   requires_grad)
persistent	group_idx)rW   r  	atom_masklit_positions)	hasattrregister_bufferrB   rq   r#   Z!restype_rigid_group_default_frameZrestype_atom14_to_rigid_groupZrestype_atom14_maskZ$restype_atom14_rigid_group_positions)r   Zfloat_dtyperW   rD   rD   rE   _init_residue_constants"  sV    


	


z.EsmFoldStructureModule._init_residue_constantsc                 C   s    |  |j|j t|||| jS rF   )r  r\   rW   r%   r  )r   rQ  alphafrD   rD   rE   r%   O  s    z/EsmFoldStructureModule.torsion_angles_to_framesc                 C   s2   |  | j| j t||| j| j| j| jS rF   )	r  r  r\   rW   r!   r  r  r  r  )r   rQ  r  rD   rD   rE   r!   U  s    zDEsmFoldStructureModule.frames_and_literature_positions_to_atom14_pos)NF)	r>   r?   r@   r   r   r  r%   r!   r   rD   rD   r   rE   r    s     
q-r  c                       s8   e Zd Z fddZdd Zdd Zedd Z  ZS )	EsmFoldingTrunkc                    s   t     | _ j} j}t | _t fddt	 j
D | _d| _t|| _t|| _t| j|| _| jjd    t j| _t| jj| _t| jj| _ j| _d S )Nc                    s   g | ]}t  qS rD   )r2  r  r  rD   rE   rU   l  s     z,EsmFoldingTrunk.__init__.<locals>.<listcomp>   r   )r   r   r  rW  r  r  pairwise_positional_embeddingr   r  rh   r  blocksrecycle_binsr   recycle_s_normrecycle_z_normr  recycle_distor   detachZzero_r  structure_modulerG  r  
trunk2sm_sr  
trunk2sm_zr   )r   r  r  r  r   r  rE   r   c  s    

 zEsmFoldingTrunk.__init__c                 C   s
   || _ d S rF   )r   )r   r   rD   rD   rE   set_chunk_sizez  s    zEsmFoldingTrunk.set_chunk_sizec              
      s  |j }|}|}	|dkr  jj}n|dk r0td|d7 } fdd}
|}|	}t|}t|}tj|jdd |tjd}t	|D ]}t
||d krg nt g  | |} | |}| | |7 }|
|| |	| ||\}}  | |d	|| }|}|}t|d
 d ddddddf dd j}W 5 Q R X q||d< ||d< |S )a~  
        Inputs:
          seq_feats: B x L x C tensor of sequence features pair_feats: B x L x L x C tensor of pair features residx: B
          x L long tensor giving the position in the sequence mask: B x L boolean tensor indicating valid residues

        Output:
          predicted_structure: B x L x (num_atoms_per_residue * 3) tensor wrapped in a Coordinates object
        Nr   z(Number of recycles must not be negative.r   c                    s>   | j ||d } jD ]}|| ||| jd\} }q| |fS )Nrk  )r   r6   r   )r  r  r   )r   r  residxr   blockr  rD   rE   
trunk_iter  s    
z+EsmFoldingTrunk.forward.<locals>.trunk_iterrn   ro  )r  r  r+   r   g      @g     `5@r-   r.   )rW   r  Zmax_recyclesr   rB   Z
zeros_liker   r[   Zint64rh   r   r   r  r  r   r  r  r  r  r  r-  r  	distogramr  )r   Z	seq_featsZ
pair_featsZtrue_aar  r   no_recyclesrW   s_s_0s_z_0r  r-   r.   Z	recycle_sZ	recycle_zr  Zrecycle_idx	structurerD   r  rE   r     sH    



  zEsmFoldingTrunk.forwardc                 C   s   t j|||d | jd}|d }dd | jdddD \}}}|| }|| }	|j|	d	d}
d
|
 d|  d|	  | }|dd d d d d f |dd d d d d f  djd	dd}t j||kd	d}|S )Nr   rV   r   c                 S   s   g | ]}| d qS )r   )rw  rQ   rD   rD   rE   rU     s     z-EsmFoldingTrunk.distogram.<locals>.<listcomp>r   r   rO   rn   gƠvl¥gO[I-?g:4M?.T)rP   Zkeepdims)rB   rq  rW   rL  crosspowr   )ZcoordsZmin_binmax_binZnum_binsZ
boundariesNCACr   r  r   ZCBdistsrt  rD   rD   rE   r    s    @zEsmFoldingTrunk.distogram)	r>   r?   r@   r   r  r   staticmethodr  r   rD   rD   r   rE   r  b  s
   Ar  a}  
    ESMForProteinFolding is the HuggingFace port of the original ESMFold model. It consists of an ESM-2 "stem" followed
    by a protein folding "head", although unlike most other output heads, this "head" is similar in size and runtime to
    the rest of the model combined! It outputs a dictionary containing predicted structural information about the input
    protein(s).
    c                
       s  e Zd ZddgZ fddZeee ej	dddZ
eedeeed	d!ej	eej	 eej	 eej	 ee edddZdd Zej	ej	dddZdd Ze d"eeee f dddZeeee dddZedddZee ee ddd Z  ZS )#EsmForProteinFoldingr  r2  c              
      s  t  | || _d| _t|dd| _| jd | jjjrF| j	  | jj
| _| jj| jj | _| jj| _| d| |j tt| jd | _| jjj}|j}|j}tt| jt| j|t t||| _t j!d | _"d| _#| j"d | _$| j"d | _%| jj&d	| _'| jj&d
| _(| jj&d| _)| jj&d| _*| jjj+rftj,| j"|dd| _-t.|| _t|| j| _/t|| j| _0t|| j"| _1d| _2|j3}tt|j4t|j4| jjj5t| jjj5| jjj5t| jjj5d| j2 | _6d S )N@   F)Zadd_pooling_layer
af2_to_esmr   r   r   r   z<cls>z<mask>z<eos><pad>)Zpadding_idxrn  %   )7r   r   r  distogram_binsr   esmZrequires_grad_esmfold_configZfp16_esmZhalfZhidden_size	esm_featsZnum_hidden_layersZnum_attention_headsZ	esm_attnsZ
esm_layersr  _af2_to_esm_from_vocab_list
vocab_listr   r   rB   r   esm_s_combinetrunkrW  r  ra  r   rG  rb  	esm_s_mlpr#   Zrestype_numZn_tokens_embedZpad_idxZunk_idxmask_idxindexesm_dict_cls_idxesm_dict_mask_idxesm_dict_eos_idxesm_dict_padding_idxembed_aar  r  r  distogram_headptm_headlm_head	lddt_binsr  r  Zlddt_head_hid_dimr7   )r   r  Ztrunk_configr  r  Zstructure_module_configr   rD   rE   r     sV    







zEsmForProteinFolding.__init__)r  rN   c                    s*     dg fddtjD  }t|S )Nr  c                    s   g | ]}  |qS rD   )r  )rR   r   r  rD   rE   rU   !  s     zDEsmForProteinFolding._af2_to_esm_from_vocab_list.<locals>.<listcomp>)r  r#   Zrestypes_with_xrB   rq   )r  Zesm_reorderrD   r  rE   r    s     z0EsmForProteinFolding._af2_to_esm_from_vocab_listzbatch_size, sequence_length)output_typeZconfig_classN)	input_idsattention_maskposition_idsmasking_patternnum_recyclesrN   c                 C   sR  | j j}|}|jd }|jd }	|j}
|dkr<tj||
d}|dkrXtj|	|
d|}| ||}|dk	r| 	||||\}}}n|}d}| 
|}|| jj}|jr|d }| }| jdd| d}| |}|||	|	|jj}| j jjr|| |7 }| j||||||d}dd | D }|rD||d	< | |d
 }||dd d }||d< | |d }||d< ||d< t| dD ]}||  |d9  < q||d< | |d |d jd ||	d| j }||d< t!|d | j d}||d< | "|d
 }||d< t#|d| j$d|d< |%t&|d| j$d t'f |S )a  
        Returns:

        Example:

        ```python
        >>> from transformers import AutoTokenizer, EsmForProteinFolding

        >>> model = EsmForProteinFolding.from_pretrained("facebook/esmfold_v1")
        >>> tokenizer = AutoTokenizer.from_pretrained("facebook/esmfold_v1")
        >>> inputs = tokenizer(["MLKNVQVQLV"], return_tensors="pt", add_special_tokens=False)  # A tiny random peptide
        >>> outputs = model(**inputs)
        >>> folded_positions = outputs.positions
        ```

        r   r   NrV   r   )r  c                 S   s   i | ]\}}|d kr||qS ))r.   r-   r'   r(   r)   r*   r+   r,   rD   rR   r_   r   rD   rD   rE   
<dictcomp>n  s    z0EsmForProteinFolding.forward.<locals>.<dictcomp>mlm_targetsr.   r/   r-   r0   r1   )r2   r5   rn   r6   r,   r7   r{  r8   r9      )r  Zno_binsr:   )(r  r  r[   rW   rB   Z	ones_likearangeZ	expand_asaf2_idx_to_esm_idx	bert_mask&compute_language_model_representationsr   r  r\   Zesm_ablate_sequencer  r   r   rw  r  r  r  r  r  r  ry   r  r   r  r"   r7   ro   r  r|  r  r    r  r  r   r&   )r   r  r	  r
  r  r  cfgaaBLrW   esmaaZ	masked_aar  esm_sr  r  r  Zdisto_logitsr0   r_   r7   r8   r9   rD   rD   rE   r   $  sb    



(zEsmForProteinFolding.forwardc                 C   s<   | j j|jkr| j |j| _ |d |dkd}| j | S Nr   r   )r  rW   r   rM  )r   r  r   rD   rD   rE   r    s    z'EsmForProteinFolding.af2_idx_to_esm_idx)r  rN   c                 C   s   t |  j}|j\}}| jjjrFtj||| j	j
d d| j|d}|S | j| j }}||df|}||df| j}	tj|||	gdd}||t||dkdf< | j||dkddd }
tj|
d	d}|d d ddf }|S )
Nr   rn   rV   r   rO   T)r	  Zoutput_hidden_statesZhidden_statesr   )next
parametersrW   r[   r  r  Z	bypass_lmrB   r   r  r   r  r  r  Znew_fullr   rY  rh   r   r  r  )r   r  rW   r  r  r  ZbosiZeosiZbosZeosZesm_hidden_statesrD   rD   rE   r    s    

 z;EsmForProteinFolding.compute_language_model_representationsc                 C   sJ   |  }|  }|  }| j||dk< d||dk< | j||dk< |||fS r  )r(  r  r  )r   r  r  r   patternZnew_aatargetZ	new_esmaarD   rD   rE   r    s    zEsmForProteinFolding.bert_mask)seqsc                    s   t |tkr|g}n|}t|  jtfdd|D  t fdd|D }|d krztj jd d	t
|dn|}|jdkr|d}| j ||dS )	Nc              	      s2   g | ]*}t tj|tjd d jddqS )T)sequencemappingZmap_unknown_to_xr   rO   )rB   Z
from_numpyr#   Zsequence_to_onehotZrestype_order_with_xr   ZargmaxrR   seqrV   rD   rE   rU     s   
z.EsmForProteinFolding.infer.<locals>.<listcomp>c                    s   g | ]}  t|qS rD   )r   ra   r#  )r1   rD   rE   rU     s     r   rV   rn   r   )r
  )rz   r   r  r  rW   rl   rB   r  r[   r}  ra   r   ru  r   r   )r   r   r
  rY   r   rD   )r1   rW   rE   infer  s*    

"

zEsmForProteinFolding.infer)r   rN   c           
      C   s   dd |   D } g }t| d d | }| d }t| d jd D ]X}| d | }|| }|| }| d | d	 }t||||| d
 | d}	|t|	 qB|S )zDReturns the pbd (file) string from the model given the model output.c                 S   s    i | ]\}}|| d  qS )r  )r   numpyr  rD   rD   rE   r    s      z6EsmForProteinFolding.output_to_pdb.<locals>.<dictcomp>r+   rn   r5   r1   r   r6   r   r8   )r1   Zatom_positionsr  r6   Z	b_factors)ry   r   rh   r[   r   r  r$   )
r   ZpdbsZfinal_atom_positionsZfinal_atom_maskrj   r  Zpred_posr   ZresidpredrD   rD   rE   output_to_pdb  s$    
z"EsmForProteinFolding.output_to_pdb)rN   c                 O   s0   t |tkst| j|f||}| |d S )EReturns the pdb (file) string from the model given an input sequence.r   )rz   r   rC  r%  r(  r   r   argskwargsr   rD   rD   rE   	infer_pdb		  s    zEsmForProteinFolding.infer_pdb)r   rN   c                 O   s   | j |f||}| |S )r)  )r%  r(  r*  rD   rD   rE   
infer_pdbs	  s    zEsmForProteinFolding.infer_pdbs)NNNN)N)r>   r?   r@   Z_no_split_modulesr   r  r   r   rB   rb   r  r   ESMFOLD_INPUTS_DOCSTRINGformatr   r&   r   r   r   r   r  r  r  r   r
   r%  r   r(  r-  r.  r   rD   rD   r   rE   r    s<   
7
    |	 'r  )r   )r   r   )rn   )rn  )br   r  dataclassesr   	functoolsr   typingr   r   r   r   r   r	   r
   r&  r   rB   Ztorch.nnr   r   Zintegrations.deepspeedr   Zmodeling_outputsr   rI   r   r   r   r   r   r   Zconfiguration_esmr   Zmodeling_esmr   r   r   Zopenfold_utilsr   r   r   r   r   r   r    r!   r"   r#   r$   r%   Z
get_loggerr>   r   Z_CHECKPOINT_FOR_DOCZ_CONFIG_FOR_DOCr&   r/  rG   rK   rb   r-  rl   r   rp   rv   r|   r   r   rG  r   Moduler   r   r   r   r   r   r  r.  rA  rP  rU  r]  r`  r2  rm  r|  r  r  r  r  r0  r  r  r  r  r  r  rD   rD   rD   rE   <module>   s   $ 8
P!
. n  K4>&h
$A F Ky	