U
    d~k                  	   @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
m  mZ d dlmZmZmZ d dlm  m  m  mZ d dlmZ eeZeej eeej e dddddd	d
gZ!dd Z"G dd de#Z$dd Z%dddZ&dd Z'edkre'  dS )    N)core	workspacedata_parallel_model)Seq2SeqModelHelperBatchencoder_inputsencoder_lengthsdecoder_inputsdecoder_lengthstargetstarget_weightsc              
   C   sv  dd | D }t |}g }t dd | D }g }g }g }g }| D ]\}	}
tjg|t|	  }|tt|	|  tjg|t|
  }tjg|
 }|t| |||  |
tjg }|| }|| t|	t|
 dkrdgt| }ndd |D }|| q@t	t
j|t
jd t
j|t
jdt
j|t
jd t
j|t
jdt
j|t
jd t
j|t
jd dS )Nc                 S   s   g | ]}t |d  qS )r   len.0entry r   F/tmp/pip-unpacked-wheel-ua33x9lu/caffe2/python/models/seq2seq/train.py
<listcomp>'   s     z!prepare_batch.<locals>.<listcomp>c                 S   s   g | ]}t |d  qS )   r   r   r   r   r   r   *   s     r   c                 S   s   g | ]}|t jkrd ndqS )r   r   )seq2seq_utilPAD_ID)r   targetr   r   r   r   G   s   )Zdtype)r   r   r	   r
   r   r   )maxr   r   r   appendlistreversedZGO_IDZEOS_IDr   nparrayZint32Z	transposeZfloat32)batchr   Zmax_encoder_lengthr
   Zmax_decoder_lengthZbatch_encoder_inputsZbatch_decoder_inputsZbatch_targetsZbatch_target_weightsZ
source_seqZ
target_seqZencoder_padsZdecoder_padsZtarget_seq_with_go_tokenZtarget_seq_with_eosr   r   r   r   r   prepare_batch&   s`    
r    c                   @   s   e Zd Zdd Zdd Zdd Zd*d	d
Zd+ddZdd Zdd Z	dd Z
dd Zdd Zdd Zd,ddZdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) ZdS )-Seq2SeqModelCaffe2c                    s   t |d} |  | t |d} |  |  jdkrt |}||  j|dd  | nh j j dkst	t
j|dd  jd tt jd  fdd	}t
j|d
d  j|tt jd  j|dd | _|j _d S )Ninit_paramsr   norm_clipped_grad_updatescopec                 S   s   d S Nr   mr   r   r   <lambda>       z1Seq2SeqModelCaffe2._build_model.<locals>.<lambda>)Zinput_builder_funZforward_pass_builder_funZparam_update_builder_funZdevicesc                    s    j | dd d S )Nr$   r%   )r$   )modelselfr   r   clipped_grad_update_bound   s    zBSeq2SeqModelCaffe2._build_model.<locals>.clipped_grad_update_boundc                 S   s   d S r'   r   r(   r   r   r   r*      r+   norm_clipped_sparse_grad_update)r   _build_shared_build_embeddingsnum_gpusmodel_build_funZAddGradientOperatorsr$   forward_model_build_fun
batch_sizeAssertionErrorr   ZParallelize_GPUr   ranger0   r,   netforward_net)r.   r#   r,   Zforward_modelZ
loss_blobsr/   r   r-   r   _build_modele   sH    








zSeq2SeqModelCaffe2._build_modelc              	   C   sn   | j d }tttjH |jdt|d dd| _|jdddd| _	|jdt

 dd| _W 5 Q R X d S )Noptimizer_paramslearning_rateF)nameZ
init_valueZ	trainableglobal_stepr   
start_time)model_paramsr   DeviceScopeDeviceOption
caffe2_pb2CPUZAddParamfloatr=   r?   timer@   )r.   r,   r<   r   r   r   r1      s"    

z Seq2SeqModelCaffe2._build_sharedc              	   C   s   t t tjx td}|jjg d| j	| j
d g| |d| _|j| j |jjg d| j| j
d g| |d| _|j| j W 5 Q R X d S )N   encoder_embeddingsencoder_embedding_size)shapeminr   decoder_embeddingsdecoder_embedding_size)r   rB   rC   rD   rE   mathsqrtparam_init_netZUniformFillsource_vocab_sizerA   rI   paramsr   target_vocab_sizerM   )r.   r,   Zsqrt3r   r   r   r2      s,    


z$Seq2SeqModelCaffe2._build_embeddingsFNc                 C   s  |j t d }|j t d }|j t d }|j t d }|j t d }|j t d }	| jd }
|
dksttj|| jt	| jd	 ||| j
| j| jd
 |
dk| jd
\}}}}}tj|| jd	 ||||||||| j| j| jd |
d| jd\}}tj|||| j| jd d}|j j|gddgdgd\}}|j j|	gddgdgd\}	}|j j|||	gddgdd\}}|j |	gd}|j ||gd}|j j|gdd| j d}|gS )Nr   r   r	   r
   r   r   	attention)noneregulardotdecoder_layer_configsrJ   rV   )
r,   encoder_paramsZnum_decoder_layersinputsinput_lengths
vocab_size
embeddingsembedding_sizeuse_attentionr3   rN   F)rY   r[   r\   r   encoder_outputsweighted_encoder_outputsfinal_encoder_hidden_statesfinal_encoder_cell_statesencoder_units_per_layerr]   r^   r_   attention_typeforward_onlyr3   decoder_softmax_size)r,   decoder_outputsdecoder_output_sizerT   rh   Ztargets_old_shape)rK   Ztarget_weights_old_shapeZOutputProbs_INVALIDloss_per_wordT)Z	only_loss	num_wordstotal_loss_scalartotal_loss_scalar_weighted      ?)Zscale)r9   ZAddExternalInputr   ZGetNameScoperA   r7   r   Zbuild_embedding_encoderrZ   r   rR   rI   r3   Zbuild_embedding_decoderrT   rM   Zoutput_projectionZReshapeZSoftmaxWithLossSumElementsMulZScaler6   )r.   r,   rg   
loss_scaler   r   r	   r
   r   r   rf   ra   rb   rc   rd   re   ri   rj   Zoutput_logits_rl   rm   rn   ro   r   r   r   r4      s    









z"Seq2SeqModelCaffe2.model_build_func                 C   s   | j |d|dS )NT)r,   rg   rs   )r4   )r.   r,   rs   r   r   r   r5   +  s
    z*Seq2SeqModelCaffe2.forward_model_build_func              
   C   s  t | g }t|D ]n\}}t| t|j| t jsF|j| n
|j| j}|j	
|gd|}	|j	|	d|}
||
 q|j	|d}|j	j|ddd}|jjg dg t| jd d	}|j	||gd
}|j	||gd}|W  5 Q R  S Q R X d S )Nzgrad_{}_squaredzgrad_{}_squared_sumgrad_squared_full_sumglobal_norm      ?)exponent	clip_normmax_gradient_normrK   valuemax_norm
norm_ratio)r   	NameScope	enumerateloggerinfo
isinstanceparam_to_gradGradientSlicevaluesr9   ZSqrformatrq   r   ZSumZPowrQ   ConstantFillrF   rA   ZMaxZDiv)r.   r,   rS   r&   ONEZgrad_squared_sumsiparamZgradZgrad_squaredZgrad_squared_sumru   rv   ry   r}   r~   r   r   r   _calc_norm_ratio2  sX    

z#Seq2SeqModelCaffe2._calc_norm_ratioc              
   C   s   |D ]}|j | }|j|gd}	t| |jj|	|gddd}
W 5 Q R X t|tjr||j}|j	|||j
||
g| q|j||||
g| qd S )NZnegative_learning_rateupdate_coeffr   )	broadcast)r   r9   ZNegativer   r   rr   r   r   r   ZScatterWeightedSumindicesZWeightedSum)r.   r~   r,   rS   r=   r&   r   r   Z
param_gradZnlrr   Zparam_grad_valuesr   r   r   _apply_norm_ratioa  s>    
z$Seq2SeqModelCaffe2._apply_norm_ratioc                 C   s   | j dkr| j}n|| jd}g }|jddD ]*}||jkr0t|j| tjs0|| q0|j	j
g ddgdd}td	 | ||||}| |||||| d S )
Nr   ZLRTZ	top_scoper   r   rp   r{   zDense trainable variables: )r3   r=   ZCopyCPUToGPU	GetParamsr   r   r   r   r   rQ   r   r   r   r   r   r.   r,   r&   r=   rS   r   r   r~   r   r   r   r$     s6    


     z+Seq2SeqModelCaffe2.norm_clipped_grad_updatec                 C   s   | j }g }|jddD ]*}||jkrt|j| tjr|| q|jjg ddgdd}t	
d | ||||}| |||||| d S )NTr   r   r   rp   r{   zSparse trainable variables: )r=   r   r   r   r   r   r   rQ   r   r   r   r   r   r   r   r   r   r0     s2    

     z2Seq2SeqModelCaffe2.norm_clipped_sparse_grad_updatec                 C   sL   | j dkrtdS d}t| j D ] }d|}t|}||7 }q"|S d S )Nr   rn   zgpu_{}/total_loss_scalar)r3   r   	FetchBlobr8   r   )r.   
total_lossr   r>   Zgpu_lossr   r   r   rn     s    




z$Seq2SeqModelCaffe2.total_loss_scalarc                 C   s0   t | jj dd }|| jj || j d S )Nc                 S   s   t j| dd | jD d d S )Nc                 S   s   g | ]}t |qS r   )str)r   r   r   r   r   r     s     zFSeq2SeqModelCaffe2._init_model.<locals>.create_net.<locals>.<listcomp>)Zinput_blobs)r   Z	CreateNetZexternal_inputs)r9   r   r   r   
create_net  s    z2Seq2SeqModelCaffe2._init_model.<locals>.create_net)r   Z
RunNetOncer,   rQ   r9   r:   )r.   r   r   r   r   _init_model  s    zSeq2SeqModelCaffe2._init_modelr   c              	   C   sZ   || _ d| _|d | _|| _|| _|| _|| _|d | _t	dddddt
| j g d S )	NZrnnencoder_typer6   Zcaffe2z--caffe2_log_level=0z--v=0z---caffe2_handle_executor_threads_exceptions=1z--caffe2_mkl_num_threads=)rA   r   rZ   rR   rT   r3   num_cpusr6   r   Z
GlobalInitr   )r.   rA   rR   rT   r3   r   r   r   r   __init__  s    

zSeq2SeqModelCaffe2.__init__c                 C   s   | S r'   r   r-   r   r   r   	__enter__  s    zSeq2SeqModelCaffe2.__enter__c                 C   s   t   d S r'   )r   ZResetWorkspace)r.   exc_type	exc_value	tracebackr   r   r   __exit__  s    zSeq2SeqModelCaffe2.__exit__c                 C   s,   t d | jdd |   t d d S )Nz3Initializing Seq2SeqModelCaffe2 from scratch: StartTr"   z4Initializing Seq2SeqModelCaffe2 from scratch: Finish)r   r   r;   r   r-   r   r   r   initialize_from_scratch  s    
z*Seq2SeqModelCaffe2.initialize_from_scratchc                 C   s   t | jd S )Nr   )r   r   r?   r-   r   r   r   get_current_step  s    z#Seq2SeqModelCaffe2.get_current_stepc                 C   s"   t | jt|  d g d S )Nr   )r   FeedBlobr?   r   r   r   r-   r   r   r   inc_current_step  s    z#Seq2SeqModelCaffe2.inc_current_stepc           
      C   s   | j dk r6t|}ttj|D ]\}}t|| qn~t| j D ]r}||d | j  }t|}ttj|D ]H\}}d||}|dkrt	
tj}	nt	
tj|}	tj|||	d qhq@|rt| j nt| jj |   |  S )Nr   z	gpu_{}/{})r   r	   )Zdevice_option)r3   r    zipr   _fieldsr   r   r8   r   r   rC   rD   rE   ZGpuDeviceTypeZRunNetr:   r,   r9   r   rn   )
r.   r   rg   Z	batch_objZbatch_obj_nameZbatch_obj_valuer   Z	gpu_batchr>   devr   r   r   step  s0    
zSeq2SeqModelCaffe2.stepc              
   C   s   d ||}ttjd| j g d|dds2ttj	
tj	|d}t|d.}|d| d	 | d
  td|  W 5 Q R X |S )Nz{0}-{1}ZSaveTZminidb)Zabsolute_pathdbZdb_type
checkpointwzmodel_checkpoint_path: "z"
all_model_checkpoint_paths: "z"
zSaved checkpoint file to )r   r   ZRunOperatorOncer   ZCreateOperatorr,   ZGetAllParamsr7   ospathjoindirnameopenwriter   r   )r.   Zcheckpoint_path_prefixZcurrent_stepZcheckpoint_pathZcheckpoint_config_pathZcheckpoint_config_filer   r   r   save'  s4    	

zSeq2SeqModelCaffe2.save)FN)N)r   r   )__name__
__module____qualname__r;   r1   r2   r4   r5   r   r   r$   r0   rn   r   r   r   r   r   r   r   r   r   r   r   r   r   r!   c   s*   4
e
/'  
#r!   c              
   C   s<  t | }t ||}g }t||D ]f\}	}
t|	|}t|
|}t|dkr"t|dkr"|d kszt||kr"t||kr"|||f q"W 5 Q R X W 5 Q R X |jdd d g g  }}|D ](}|| t||kr|| g }qt|dkr.t||k r||d  qt||ks$t|| t	| |S )Nr   c                 S   s   t | d t | d fS )Nr   r   r   )Zs_tr   r   r   r*   ^  r+   zgen_batches.<locals>.<lambda>)keyrk   )
r   r   r   Zget_numberized_sentencer   r   sortr7   randomshuffle)source_corpustarget_corpussource_vocabtarget_vocabr6   
max_lengthsourcer   Zparallel_sentencesZsource_sentenceZtarget_sentenceZnumerized_source_sentenceZnumerized_target_sentencebatchesr   Zsentence_pairr   r   r   gen_batchesC  sN    









r   c           
   	   C   sv  t | j| j}t | j| j}tdt| tdt| t	| j| j|||d | j
}tdt| t	| j| j|||d | j
}tdt| t|t|t|| jdd}|  t| jD ]}td| d	}|D ]}	||j|	d
d7 }qtd| d	}|D ]}	||j|	dd7 }q$td| | jd k	r|| j| qW 5 Q R X d S )NzSource vocab size {}zTarget vocab size {}r6   zNumber of training batches {}zNumber of eval batches {}   )rA   rR   rT   r3   r   zEpoch {}r   F)r   rg   z	training loss {}Tz	eval loss {})r   Z	gen_vocabr   Zunk_thresholdr   r   r   r   r   r   r   Zsource_corpus_evalZtarget_corpus_evalr!   r3   r   r8   Zepochsr   r   r   )
argsrA   r   r   r   Zbatches_evalZ	model_objr   r   r   r   r   r   run_seq2seq_modelo  sd     
  

r   c                  C   sB  t d tjdd} | jdtd ddd | jdtd d	dd | jd
td dd | jdtddd | jdtddd | jdtddd | jdtddd | jdtddd | jdtddd | jdd d!d" | jd#d d$d" | jd%td d&dd | jd'td d(dd | jd)td*d+d | jd,td-d.d | jd/td*d0d | jd1td-d2d | jd3td4d5d | jd6td*d7d | jd8td d9d | jd:td d;d |  }t	|j
d<g|j }|jr|j
d- dkst|d d=  d-  < t	|jd<g|j }t|t	|jrd>nd?|t	||jd@|jt	|jdA|j|j|j|jdB	dC d S )DNiz  zCaffe2: Seq2Seq Training)descriptionz--source-corpuszcPath to source corpus in a text file format. Each line in the file should contain a single sentenceT)typedefaulthelprequiredz--target-corpusz+Path to target corpus in a text file formatz--max-lengthz+Maximal lengths of train and eval sentences)r   r   r   z--unk-threshold2   zCThreshold frequency under which token becomes labeled unknown tokenz--batch-size    zTraining batch sizez--epochs
   z'Number of iterations over training dataz--learning-raterw   zLearning ratez--max-gradient-normrp   zbMax global norm of gradients at the end of each backward pass. We do clipping to match the number.z
--num-gpusr   z&Number of GPUs for data parallel modelz--use-bidirectional-encoder
store_truezJSet flag to use bidirectional recurrent network for first layer of encoder)actionr   z--use-attentionz,Set flag to use seq2seq with attention modelz--source-corpus-evalz:Path to source corpus for evaluation in a text file formatz--target-corpus-evalz:Path to target corpus for evaluation in a text file formatz--encoder-cell-num-unitsi   z&Number of cell units per encoder layerz--encoder-num-layers   zNumber encoder layersz--decoder-cell-num-unitsz)Number of cell units in the decoder layerz--decoder-num-layerszNumber decoder layersz--encoder-embedding-size   z&Size of embedding in the encoder layerz--decoder-embedding-sizez&Size of embedding in the decoder layerz--decoder-softmax-sizez$Size of softmax layer in the decoderz--checkpointzPath to checkpoint)	num_unitsr   rW   rV   )encoder_layer_configsuse_bidirectional_encoder)r=   )	rU   rY   r   r6   r<   rJ   rN   rh   rz   )rA   )r   seedargparseArgumentParseradd_argumentr   intrF   
parse_argsdictZencoder_cell_num_unitsZencoder_num_layersr   r7   Zdecoder_cell_num_unitsZdecoder_num_layersr   r`   r6   r=   rJ   rN   rh   rz   )parserr   r   rY   r   r   r   main  s    



















r   __main__)N)(r   collectionsloggingrO   Znumpyr   r   rG   sysr   Zcaffe2.proto.caffe2_pb2protorD   Zcaffe2.pythonr   r   r   Z)caffe2.python.models.seq2seq.seq2seq_utilpythonmodelsZseq2seqr   Z1caffe2.python.models.seq2seq.seq2seq_model_helperr   	getLoggerr   r   setLevelINFO
addHandlerStreamHandlerstderr
namedtupler   r    objectr!   r   r   r   r   r   r   r   <module>   sB   

=   c,
2_
