U
    d	O                     @   s   d Z ddlZddlmZ ddlm  mZ ddlmZm	Z	m
Z
mZ dZdZdZdZdZd	Zd
ZdZdd Zdd Zd#ddZd$ddZdd Zdd Zd%ddZG dd deZdd Zd&dd Zd!d" ZdS )'z? A bunch of util functions to build Seq2Seq models with Caffe2.    N)	viewitems)	attentioncorernn_cellbrewz<PAD>   z<GO>   z<EOS>   z<UNK>c              	      s   t  fdd t dd } t   t   t   t  t| 6}|D ]*}|  }|D ]}||  d7  < qbqNW 5 Q R X t	|D ]\}}||kr |  q S )Nc                      s   t  S N)len vocabr   M/tmp/pip-unpacked-wheel-ua33x9lu/caffe2/python/models/seq2seq/seq2seq_util.py<lambda>       zgen_vocab.<locals>.<lambda>c                   S   s   dS )Nr   r   r   r   r   r   r      r   r   )
collectionsdefaultdictPADGOEOSUNKopenstripsplitr   )ZcorpusZunk_thresholdZfreqsfsentencetokenstokenfreqr   r   r   	gen_vocab   s    

r    c                 C   s@   g }|    D ]*}||kr,|||  q||t  q|S r
   )r   r   appendr   )r   r   Znumerized_sentencer   r   r   r   get_numberized_sentence0   s    r"   c
              	   C   s   t |	2 | jjg d|gdd}
| jjg d|gdd}W 5 Q R X tj||dd|	rZ|	d ndd |d	}|d
krtd
nd| }|d
k	rtj|||	r|	d ndd |dd}g }|r|d |r|ddg |j	| ||||
f|d\}\}}}}|||fS )z Unidirectional LSTM encoder.initial_cell_state        shapevalueinitial_hidden_stateF/ Zlstm)
input_sizehidden_sizeforget_biasmemory_optimizationnameforward_onlyN      ?Zdropout)internal_celldropout_ratior/   r0   is_testr   r   r	   modelinputsseq_lengthsinitial_statesoutputs_with_grads)
r   	NameScopeparam_init_netConstantFillr   LSTMCellDropoutCellr!   extendapply_over_sequence)r6   r7   input_lengthsr+   	num_unitsdropout_keep_probr0   return_sequence_outputreturn_final_statescoper#   r(   cellr3   r:   outputs_final_hidden_statefinal_cell_stater   r   r   rnn_unidirectional_layer:   sZ    

	rM   c
                 C   sb  t | |||||||||	r |	d ndd d
\}
}}t|	 | j||gdg}W 5 Q R X t | |||||||||	r||	d ndd d
\}}}t|	 | j||gdg}W 5 Q R X |rt|	$ | jj|
|gdd	gd
d\}}W 5 Q R X nd }|rPt|	B | jj||gddgd
d\}}| jj||gddgd
d\}}W 5 Q R X nd }d }|||fS )Nr)   r*   fw)rG   reversed_inputsZbw
outputs_bwrI   Zoutputs_dimr   ZaxisrK   Zfinal_hidden_state_dimrL   Zfinal_cell_state_dim)rM   r   r;   netZReversePackedSegsZConcat)r6   r7   rB   r+   rC   rD   r0   rE   rF   rG   Z
outputs_fwZfinal_hidden_fwZfinal_cell_fwrO   rP   Zfinal_hidden_bwZfinal_cell_bwrI   rJ   rK   rL   r   r   r   rnn_bidirectional_layer|   sr    
rS   c                 C   s,   | j jg |||gdd}|s(| j| |S )Ng?)r&   Zstd)r<   ZGaussianFillparamsr!   )r6   
vocab_sizeembedding_sizer/   Zfreeze_embeddings
embeddingsr   r   r   build_embeddings   s    rX   c                 C   s    | r| d nd| }d ||S )Nr)   r*   z
{}/layer{})format)rG   Z
layer_typeiprefixr   r   r   get_layer_scope   s    r\   Fc           !      C   s~  t |p
db |	dkr.| j||gdg}n>t t tj | j||gdg}W 5 Q R X | |d}W 5 Q R X |}|}g }g }g }t	|d }|
dd}t|d D ]\}}|r|dkrt}d|d	  }nt}|d	 }|| ||d
 k}|
dd }||| k}|| ||||d	 ||
| p,||t|d|d
\}}}|sR|}|}|| || q|}d } || |||fS )Nr*   r   embedded_encoder_inputsembedded_encoder_inputs_cpuZencoder_layer_configsuse_bidirectional_encoderFr   rC   r   rD   encoder)
r6   r7   rB   r+   rC   rD   r0   rE   rF   rG   )r   r;   rR   GatherDeviceScopeDeviceOption
caffe2_pb2CPUCopyCPUToGPUr   get	enumeraterS   rM   r!   r\   )!r6   Zencoder_paramsnum_decoder_layersr7   rB   rU   rW   rV   use_attentionnum_gpusr0   rG   r]   r^   Zlayer_inputsZlayer_input_sizeencoder_units_per_layerfinal_encoder_hidden_statesfinal_encoder_cell_statesnum_encoder_layersr_   rZ   layer_configZ
layer_funcZoutput_dimsZis_final_layerrD   rF   Zlayer_outputsZfinal_layer_hidden_stateZfinal_layer_cell_stateencoder_outputsweighted_encoder_outputsr   r   r   build_embedding_encoder   s    



rs   c                   @   sV   e Zd Zdd Zdd ZdddZdd	 Zd
d Zdd Zdd Z	dd Z
dd ZdS )LSTMWithAttentionDecoderc                 C   s   | j d k	r| j d | S |S )Nr)   )r/   )selfr/   r   r   r   rG   B  s    zLSTMWithAttentionDecoder.scopec                 C   s4   |dkrt jjS |dkr t jjS ds0td| d S )NZregularZ	recurrentFzUnknown type )r   ZAttentionTypeZRegularZ	RecurrentAssertionError)ru   Zattention_type_as_stringr   r   r   _get_attention_typeE  s
    z,LSTMWithAttentionDecoder._get_attention_typeNc                 C   s   |
| _ t|| _|dkrHtj|| d|	d| _d| _|| _| jj	| _	njtj|| d|	d}tj
|||||| d| ||dd	| _d| _|| | _|j	| _	| j	d| j  d S )	Nnonedecoder)r/   residual_output_layersFattention_decoderT)	encoder_output_dimrq   encoder_lengthsdecoder_cellZdecoder_state_dimr/   attention_typerr   Zattention_memory_optimizationr   )r/   r   Z
num_layersr   ZMultiRNNCellrG   rH   rj   decoder_output_dimoutput_indicesZAttentionCellrw   r!   )ru   rq   r|   r}   rU   r   rV   decoder_num_unitsdecoder_cellsrz   r/   rr   r~   r   r   r   __init__M  s>    

z!LSTMWithAttentionDecoder.__init__c                 C   s
   | j  S r
   )rH   get_state_namesru   r   r   r   r   }  s    z(LSTMWithAttentionDecoder.get_state_namesc                 C   s   dd | j D S )Nc                 S   s   g | ]}d | qS )r   r   ).0rZ   r   r   r   
<listcomp>  s     zCLSTMWithAttentionDecoder.get_outputs_with_grads.<locals>.<listcomp>)r   r   r   r   r   get_outputs_with_grads  s    z/LSTMWithAttentionDecoder.get_outputs_with_gradsc                 C   s   | j S r
   )r   r   r   r   r   get_output_dim  s    z'LSTMWithAttentionDecoder.get_output_dimc                 C   s   | j s
t| j S r
   )rj   rv   rH   get_attention_weightsr   r   r   r   r     s    
z.LSTMWithAttentionDecoder.get_attention_weightsc                 C   s   | j j|||||dS )N)r6   input_tr8   statestimestep)rH   apply)ru   r6   r   r8   r   r   r   r   r   r     s    zLSTMWithAttentionDecoder.applyc                 C   s   | j j|||||  dS )Nr5   )rH   rA   r   )ru   r6   r7   r8   r9   r   r   r   rA     s    z,LSTMWithAttentionDecoder.apply_over_sequence)NNN)__name__
__module____qualname__rG   rw   r   r   r   r   r   r   rA   r   r   r   r   rt   @  s      
0rt   c              	   C   s  t |}t |}||kr"|| }nd}g }	t|D ]D\}
}|r^t ||
| kr^||
|  }nd }|d kr| jjg d|
|gdd}| j| n:|||
|  krtj| |d|
||
|  |dd}n|}|	| |rt ||
| kr||
|  }nd }|d kr2| jjg d|
|gdd}| j| n<|||
|  krjtj| |d|
||
|  |dd}n|}|	| q2|r| jjg d|d	 gdd}| j| |	| |	S )
Nr   zdecoder_initial_hidden_state_{}r$   r%   r   rQ   zdecoder_initial_cell_state_{}*initial_attention_weighted_encoder_context)	r   rh   r<   r=   rY   rT   r!   r   fc)r6   rl   decoder_units_per_layerrm   rn   rj   ro   ri   offsetr9   rZ   r   Zfinal_encoder_hidden_stateZdecoder_initial_hidden_stateZfinal_encoder_cell_stateZdecoder_initial_cell_stater   r   r   r    build_initial_rnn_decoder_states  s    

	


	
r   c           "      C   s  t |p
db |dkr.| j||gdg}n>t t tj | j||gdg}W 5 Q R X | |d}W 5 Q R X g }g }t	|D ]\}}|d }|
| |dkr|}n|d  }tj|||ddd	}|d
d }|d k	r
d|j }tj|||dt|d|d}|
| qt| |	||||dkd}t||	d ||
|||d |||d
}|j| |||d\}}| jj|gddgd| gd\} }| }| }!||!fS )Nr*   r   embedded_decoder_inputsembedded_decoder_inputs_cpurC   r   r$   F)r0   r+   r,   r-   r.   rD   r1   Zdecoder_dropout)r2   r3   r0   r4   r/   rx   )r6   rl   r   rm   rn   rj   )
rq   r|   r}   rU   r   rV   r   r   rr   r/   )r6   r7   r8   r9   decoder_outputs_flattenedZ2decoder_outputs_and_contexts_combination_old_shaper&   )r   r;   rR   ra   rb   rc   rd   re   rf   rh   r!   r   r   r>   rg   rD   r?   r\   r   rt   rA   ZReshape)"r6   Zdecoder_layer_configsr7   rB   r}   rq   rr   rm   rn   rl   rU   rW   rV   r   r0   rk   rG   r   r   r   r   rZ   rp   rC   r+   rH   rD   r3   r   r{   decoder_outputsrJ   r   r   r   r   r   build_embedding_decoder  s    







	r   c                 C   st   |d k	r t j| |d||d}|}| jjg d||gd}| jjg d|gd}| j||g | j|||gdg}|S )NZdecoder_outputs_scaled)Zdim_inZdim_outoutput_projection_wr   output_projection_boutput_logits)r   r   r<   Z
XavierFillrT   r@   rR   ZFC)r6   r   Zdecoder_output_sizeZtarget_vocab_sizeZdecoder_softmax_sizer   r   r   r   r   r   output_projectionx  s>    r   )N)N)r   FN)r   N) __doc__r   Zfuture.utilsr   Zcaffe2.proto.caffe2_pb2protord   Zcaffe2.pythonr   r   r   r   ZPAD_IDr   ZGO_IDr   ZEOS_IDr   ZUNK_IDr   r    r"   rM   rS   rX   r\   rs   objectrt   r   r   r   r   r   r   r   <module>   s<    
L 
M   
`lk  
q