U
    dj                     @   sz  d dl Z d dlZd dlZd dlZd dlZd dlmZmZm	Z	m
Z
 d dlmZmZ d dlmZmZmZ d dlmZ d dlm  m  mZ d dlm  m  mZ d dlmZmZ d dlm  m  mZ d dlm  m  m Z! d dl"m#Z# e$  e%dZ&e&'ej( e)d	 e)d
 dddZ*dd Z+dd Z,dd Z-dd Z.dd Z/dd Z0e1dkrve2ddg e0  dS )    N)core	workspaceexperiment_utildata_parallel_model)dyndep	optimizer)timeout_guardmodel_helperbrew)
caffe2_pb2)InitializerPseudoFP16Initializer)predictor_constantsZImagenet_trainerz2@/caffe2/caffe2/distributed:file_store_handler_opsz3@/caffe2/caffe2/distributed:redis_store_handler_opsc           
      C   sP   t j| |ddg||t| jr"dndd||ddd|d|d\}}	| ||}d	S )
z
    The image input operator loads image and label data from the reader and
    applies transformations to the images (random cropping, mirroring, ...).
    datalabelTFg      `@      )
batch_sizeoutput_typeZuse_gpu_transformZuse_caffe_datummean_per_channelstd_per_channelZmeanZstdscaleZcropZmirroris_testN)r
   Zimage_inputr   ZIsGPUDeviceTypeZ_device_typeZStopGradient)
modelreaderr   img_sizedtyper   r   r   r   r    r   K/tmp/pip-unpacked-wheel-ua33x9lu/caffe2/python/examples/imagenet_trainer.pyAddImageInput/   s$     
r   c                 C   sl   |dkrdnd}| j jg d| g|d||gd |dkrJ| j d| d | j jg dg|gdtjjd	 d
S )z
    The null input function uses a gaussian fill operator to emulate real image
    input. A label blob is hardcoded to a single value. This is useful if you
    want to test compute throughput or don't have a dataset available.
    float16Z_fp16 r      )shaper   r   )r#   valuer   N)param_init_netZGaussianFillZFloatToHalfZConstantFillr   DataTypeZINT32)r   r   r   r   r   suffixr   r   r   AddNullInputR   s    
r(   c                 C   s   d |j|jd }tj|j t||d g|d g|d d| j	f|d | j
| j| jfid}d| j| j|f }tjd|||d	 d S )
Nz[]_{}r   z/dataz/softmaxr   )Zpredict_net
parametersinputsoutputsZshapesz%s/%s_%d.mdlminidb)db_typeZdb_destinationpredictor_export_meta	use_ideep)format_device_prefix_devicespred_expZPredictorExportMetanetProtor   ZGetCheckpointParams
num_labelsnum_channels
image_sizefile_store_pathsave_model_nameZ
save_to_db)argstrain_modelepochr/   prefixr.   
model_pathr   r   r   	SaveModelj   s.      	r@   c                 C   s   t d|  t| d}tt|t	j
}tt|t	j}|rR|  n|  |rh|  n|  t|s~tt|sttd}tjd|ttjdd dS )z)
    Load pretrained model from file
    zLoading path: {}r,   Zoptimizer_iterationr   )Zdevice_optionN)loginfor0   r3   Zload_from_dbr   ZNet
pred_utilsZGetNetr   ZGLOBAL_INIT_NET_TYPEZPREDICT_INIT_NET_TYPEZRunAllOnIDEEPZRunAllOnGPUr   
RunNetOnceAssertionError	FetchBlobZFeedBlobZDeviceOptionr   ZCPU)pathr   r/   Zmeta_net_defZinit_netZpredict_init_netZitercntr   r   r   	LoadModel   s0      


rH   c                 C   s>  t d|| j t| j| | }t| j| | }	t|D ]}
|
dkrR| jn| j	}t
|0 t }t|j j t }|| }W 5 Q R X d}t ||
d ||||  d|j|jd }t|d }t|d }d}t ||| q@|| | }d|j|jd }t|d }t|d }tt|d }d}d}|d	k	rd}t|	D ]r}t|j j |jD ]R}|ttd|j|d 7 }|ttd|j|d
 7 }|d7 }qqp|| }|| }nd}d}|j ||
||  ||||||dd |dk s6td|d S )zI
    Run one epoch of the trainer.
    TODO: add checkpointing here.
    zStarting epoch {}/{}r   z8Finished iteration {}/{} of epoch {} ({:.2f} images/sec)r   z{}_{}z	/accuracyz/losszTraining loss: {}, accuracy: {}Nz/accuracy_top5)accuracylosslearning_rater=   Ztop1_test_accuracyZtop5_test_accuracy)Zinput_countZbatch_countZadditional_values(   zExploded gradients :()rA   rB   r0   
num_epochsint
epoch_sizeZtest_epoch_sizerangeZfirst_iter_timeouttimeoutr   ZCompleteInTimeOrDietimer   ZRunNetr4   r5   namer1   r2   rF   r   ZGetLearningRateBlobNamesnpZasscalarrE   )r;   r=   r<   
test_modeltotal_batch_size
num_shardsexpnameexplogepoch_itersZtest_epoch_itersirR   t1t2dtfmtr>   rJ   rK   Z	train_fmtZ
num_imagesrL   Ztest_accuracyZtest_accuracy_top5Zntests_gr   r   r   RunEpoch   st    





rc   c                    s   j dkrdt j }n j dkr(d} jd k	rRdd  jdD }t|}ntt j} j}t	
d|  j| | dkstd jrt j jkstd	 jrt j jkstd
 j }t j| }|dkstd||  _t	
d j  jr.dddd}nddd jd d d}tj||d} j j} jd}	tdd k	rttddttdd}dkrHtd |d j|	d dd d}
ndkrDd} jd k	rt t!j"dg |g j j# j$d n t t!j"dg |g j% j$d t||d j|	d d d}
nd }
 fdd } fd!d"} fd#d$} j&d%kr fd&d'}n(|j'd( j& j(|d) fd*d'}d+d, }t)j*|| j dkr|n|||||
d j+ j, j j, j,d- t)-|i t. d t/|j0 t1|j2 d } j3d k	rt	
d.  jrRddd/}ndddd0}tj|d1 |dd2}|j'd3 j3 j(d4 fd5d6}t)j*|| j dkr|n||d | j+ j,d7 t/|j0 t1|j2 d} j4d k	rNt5 j4| j t)6|  j4d8d9 }|7d:rDt|d d; }t	
d<| n
t	8d= d>| j j9 j:f }t;<| }| j=k rt> |||||}t? || j d? j% j@f }tjAB|t|d  d: rrtC|t|d  d:  qrd S )@Nresnext
shufflenetc                 S   s   g | ]}t |qS r   )rO   ).0xr   r   r   
<listcomp>  s     zTrain.<locals>.<listcomp>,zRunning on GPUs: {}r   z%Number of GPUs must divide batch sizez8The number of channels of image mean doesn't match inputz7The number of channels of image std doesn't match inputz;Epoch size must be larger than batch size times shard countzUsing epoch size: {}Fr   )	use_cudnncudnn_exhaustive_searchZtraining_modeZNCHWTi   )orderrj   rk   Zws_nbytes_limit)rT   	arg_scopeZOMPI_COMM_WORLD_SIZEZOMPI_COMM_WORLD_RANKZGLOO)
kv_handlerrX   shard_idengine	transport	interfaceZmpi_rendezvous	exit_netsstore_handlerZRedisStoreHandlerCreate)hostportr>   ZFileStoreHandlerCreate)rG   r>   )rn   ro   rX   rp   rq   rr   rs   c                    s    j dkrtnt}tjtjtjg|| j jd, t	j
| d j j j j jddd	}W 5 Q R X  j dkr| j||d }| |dgdd	g\}}| j||d
}tj| |dgddd tj| |dgddd |gS )Nr    ZWeightInitializerZBiasInitializerenable_tensor_corefloat16_computer   T)num_input_channelsr6   
num_layersZ
num_groupsZnum_width_per_groupZno_biasno_loss_fp32r   softmaxrK   r   rJ   r   Ztop_kaccuracy_top5   )r   r   r   r
   rm   convfcrx   ry   resnetZcreate_resnextr7   r6   r{   Zresnext_num_groupsZresnext_width_per_groupr4   HalfToFloatSoftmaxWithLossScalerJ   r   Z
loss_scaleZinitializerpredr~   rK   r;   r   r   create_resnext_model_opsw  s:    

z'Train.<locals>.create_resnext_model_opsc              	      s    j dkrtnt}tjtjtjg|| j jd t	j
| d j jdd}W 5 Q R X  j dkrr| j||d }| |dgdd	g\}}| j||d
}tj| |dgddd tj| |dgddd |gS )Nr    rw   r   T)rz   r6   r|   r}   r   r~   rK   r   rJ   r   r   r   r   )r   r   r   r
   rm   r   r   rx   ry   re   Zcreate_shufflenetr7   r6   r4   r   r   r   rJ   r   r   r   r   create_shufflenet_model_ops  s2    

z*Train.<locals>.create_shufflenet_model_opsc              
      sh   t d j   } jr<tj|  jdd jd|dd}n(t|  j tj|  jddd|dd}|S )N   g?r   step皙?)momentumnesterovweight_decaypolicystepsizegamma)r   r   r   r   r   )	rO   rP   ry   r   Zbuild_fp16_sgdbase_learning_rater   Zadd_weight_decayZbuild_multi_precision_sgd)r   Zstepszopt)r;   rX   rW   r   r   add_optimizer  s.    	zTrain.<locals>.add_optimizernullc                    s   t | d  j jd d S )N)r   r   r   )r(   r8   r   r   )r;   batch_per_devicer   r   add_image_input  s    zTrain.<locals>.add_image_inputr   )dbr-   rX   ro   c              
      s$   t |  j jd j jd d S )NFr   r   r   r   r   r   r   r8   r   image_mean_per_channelimage_std_per_channelr   )r;   r   r   r   r   r     s    c                 S   s<   |  |  D ](}|jdk	r| j|j|jtjj  qdS )z-Add ops applied after initial parameter sync.N)	ZGetOptimizationParamInfoZ	GetParamsZ	blob_copyr%   r   Zblobr   r&   FLOAT)r   Z
param_infor   r   r   add_post_sync_ops  s    
z Train.<locals>.add_post_sync_ops)input_builder_funforward_pass_builder_funZoptimizer_builder_funpost_sync_builder_fundevices
rendezvousZoptimize_gradient_memoryuse_nccl
cpu_deviceZideepZshared_modelZcombine_spatial_bnz----- Create test net ----)rj   rk   )rl   rj   rk   _test)rT   rm   Zinit_paramstest_reader)r   r-   c              
      s$   t |  j jd j jd d S )NTr   r   r   )r;   r   r   r   r   test_input_fn(  s    zTrain.<locals>.test_input_fn)r   r   r   Zparam_update_builder_funr   r   r   ra   rI   z.mdlzReset epoch to {}z,The format of load_model_path doesn't match!z%s_gpu%d_b%d_L%d_lr%.2f_v2z%s/%s_)Dr   strr{   gpussplitlenlistrQ   num_gpusrA   rB   r0   r   rE   r   r7   r   rX   rO   rP   r/   Zcudnn_workspace_limit_mbr	   ZModelHelperro   Zdistributed_interfacesosgetenvdictZdistributed_transportZ
redis_hostr   ZRunOperatorOncer   ZCreateOperatorZ
redis_portZrun_idr9   Z
train_dataZCreateDBr-   r   ZParallelizer   Zuse_cpuZOptimizeGradientMemorysetrD   r%   Z	CreateNetr4   Z	test_dataZload_model_pathrH   ZFinalizeAfterCheckpointendswithwarningr6   r   r   ZModelTrainerLogrN   rc   r@   r:   rG   isfileremove)r;   Z
model_namer   r   Zglobal_batch_sizer[   Ztrain_arg_scoper<   ro   Z
interfacesr   rt   r   r   r   r   r   rV   Ztest_arg_scoper   r=   Zlast_strrY   rZ   r?   r   )r;   r   rX   r   r   rW   r   Train   s   






 


  
  
		


r   c                  C   s  t jdd} | jdtd ddd | jdtdd	d
 | jdtddd
 | jdtddd
 | jdtd dd
 | jdtddd | jdtddd | jdtddd
 | jdtddd
 | jdtdd  | jd!tdd"d
 | jd#td$d%d
 | jd&td'd(d
 | jd)td*d+d
 | jd,td-d.d
 | jd/td0d1d
 | jd2td*d3d
 | jd4td5d6d
 | jd7td8d9d
 | jd:tdd;d
 | jd<tdd=d
 | jd>td?d@d
 | jdAtdBd  | jdCtdDd  | jdEtdFdGd
 | jdHtdIdJd
 | jdKtdLdMd
 | jdNtd dOd
 | jdPdQdRdS | jdTdQdUdS | jdVtdWdXd
 | jdYdZdZd[gd\d] | jd^dQd_dS | jd`dQdadS | jdbtdcddd
 | jdetdfdgd
 | jdhtdidjd
 | jdktdldmd
 | jdndododpdqdogdrds |  }t| d S )tNzCaffe2: ImageNet Trainer)descriptionz--train_dataTz-Path to training data (or 'null' to simulate))typedefaultrequiredhelpz--num_layers2   z'The number of layers in ResNe(X)t model)r   r   r   z--resnext_num_groupsr   zThe cardinality of resnextz--resnext_width_per_group@   z--test_datazPath to test dataz--image_mean_per_channel+z#The per channel mean for the images)r   nargsr   z--image_std_per_channelz1The per channel standard deviation for the imagesz--test_epoch_sizeiP  zNumber of test imagesz	--db_typeZlmdbz'Database type (such as lmdb or leveldb)z--gpusz*Comma separated list of GPU devices to use)r   r   z
--num_gpusz)Number of GPU devices (instead of --gpus)z--num_channelsr"   zNumber of color channelsz--image_size   zInput image size (to crop to)z--num_labelsi  zNumber of labelsz--batch_size    zBatch size, total over all GPUsz--epoch_sizei` z/Number of images/epoch, total over all machinesz--num_epochszNum epochs.z--base_learning_rater   zInitial learning rate.z--weight_decayg-C6?z Weight decay (L2 regularization)z--cudnn_workspace_limit_mbzCuDNN workspace limit in MBsz--num_shardsz%Number of machines in distributed runz
--shard_idr   z	Shard id.z--run_idz!Unique run identifier (e.g. uuid)z--redis_hostz%Host of Redis server (for rendezvous)z--redis_porti  z%Port of Redis server (for rendezvous)z--file_store_pathz/tmpz'Path to directory to use for rendezvousz--save_model_nameZresnext_modelz&Save the trained model to a given namez--load_model_pathz0Load previously saved model to continue trainingz	--use_cpu
store_truezUse CPU instead of GPU)actionr   z
--use_ncclz"Use nccl for inter-GPU collectivesz--use_ideepFz	Use ideepz--dtypefloatr    zData type used for training)r   choicesr   z--float16_computez"Use float 16 compute, if availablez--enable_tensor_corez+Enable Tensor Core math for Conv and FC opsz--distributed_transportZtcpz2Transport to use for distributed run [tcp|ibverbs]z--distributed_interfacesr!   z-Network interfaces to use for distributed runz--first_iter_timeouti  z<Timeout (secs) of the first iteration (default: %(default)s)z	--timeout<   zJTimeout (secs) of each (except the first) iteration (default: %(default)s)z--modelrd   ?re   zList of models which can be run)r   constr   r   r   )	argparseArgumentParseradd_argumentr   rO   r   bool
parse_argsr   )parserr;   r   r   r   mainv  s    




























  r   __main__Zcaffe2z--caffe2_log_level=2)NN)3r   loggingZnumpyrU   rS   r   Zcaffe2.pythonr   r   r   r   r   r   r   r	   r
   Zcaffe2.protor   Zcaffe2.python.models.resnetpythonmodelsr   Zcaffe2.python.models.shufflenetre   Z#caffe2.python.modeling.initializersr   r   Z*caffe2.python.predictor.predictor_exporterZ	predictorZpredictor_exporterr3   Z*caffe2.python.predictor.predictor_py_utilsZpredictor_py_utilsrC   Z!caffe2.python.predictor_constantsr   basicConfig	getLoggerrA   setLevelDEBUGZInitOpsLibraryr   r(   r@   rH   rc   r   r   __name__Z
GlobalInitr   r   r   r   <module>   sB   



  
# T  {^
