U
    d                     @   s   d dl Z d dlmZ d dlmZmZ d dlZd dl	Z	d dl
Z
d dlmZ d dlmZmZmZmZ d dlm  mZ ejd ed dd ZdddZe ej dG dd dejZdS )    N)givenassume)
caffe2_pb2)core	workspacemujidyndep   z%@/caffe2/caffe2/contrib/nccl:nccl_opsc                 C   s   t  }tj|_| |_|S N)r   ZDeviceOptionr   ZGpuDeviceTypeZdevice_typeZ	device_id)idevice_option r   E/tmp/pip-unpacked-wheel-ua33x9lu/caffe2/contrib/nccl/nccl_ops_test.py
gpu_device   s    r      d   c                 C   sv   t |D ]}| | qtd}|td|| t }| | t }td|| t	| d  || S )Nplanz	test-stepz2Timing network, time taken per-iteration: {:.6f}msg     @@)
rangerunr   ZPlanZAddStepZExecutionSteptimeprintformatfloat)wsnetZwarmupsiters_r   beforeafterr   r   r   	benchmark   s    

 r   zNCCL only on CUDA GPUc                	   @   sx  e Zd Zeejde dejddde ddd Z	eejde dejdddejde d dd	d
d Z
eejde dejdddejddde ddd Zeejde dejdddddd Zeejde dejdddddd Zeejde dejdddejdddedddgddd Zeejd ddd Zd S )!NCCLOpsTest   )Z	min_valueZ	max_valuer	   i  )nmin_placec           
         s    fddt D dd t D }|r0dndfddt D }td||}dd	 t|D }fd
d}| tj|fddt|D ||}|D ].}	tj	|d |	 | 
|d  |	  qd S )Nc                    s    g | ]}t j t jqS r   nprandomZrandnZastypefloat32.0r   r#   r   r   
<listcomp>/   s     z3NCCLOpsTest.test_nccl_allreduce.<locals>.<listcomp>c                 S   s   g | ]}t d |qS zx_{}strr   r)   r   r   r   r,   0   s      oc                    s   g | ]}t d  |qS z{}x_{}r.   r)   prefixr   r   r,   2   s     NCCLAllreducec                 S   s   i | ]\}}|t |qS r   r   r*   r   r"   r   r   r   
<dictcomp>4   s      z3NCCLOpsTest.test_nccl_allreduce.<locals>.<dictcomp>c                     s4   t | ksttj| dd  fddtD S )Nr   Zaxisc                    s   g | ]} qS r   r   r*   r   outputr   r   r,   9   s     zFNCCLOpsTest.test_nccl_allreduce.<locals>.allreduce.<locals>.<listcomp>)lenAssertionErrorr&   sumr   argsr"   r;   r   	allreduce6   s    z2NCCLOpsTest.test_nccl_allreduce.<locals>.allreducec                    s   g | ]\}} | qS r   r   r*   r   r   xsr   r   r,   <   s     r   r   r   CreateOperator	enumerateassertReferenceCheckshugpu_dor&   testingassert_array_equalassertEqualtobytes)
selfr"   r#   r$   inputsoutputsopinput_device_optionsrC   r<   r   )r#   r"   r4   rF   r   test_nccl_allreduce+   s"       zNCCLOpsTest.test_nccl_allreducer   )r"   r#   rootc                    s   t k   fddtD dd tD }tjd||d}dd t|D }fdd	}| tj|fd
dt|D || d S )Nc                    s    g | ]}t j t jqS r   r%   r)   r+   r   r   r,   H   s     z3NCCLOpsTest.test_nccl_broadcast.<locals>.<listcomp>c                 S   s   g | ]}t d |qS r-   r.   r)   r   r   r   r,   I   s     ZNCCLBroadcastrW   c                 S   s   i | ]\}}|t |qS r   r6   r7   r   r   r   r8   K   s      z3NCCLOpsTest.test_nccl_broadcast.<locals>.<dictcomp>c                     s(   t  kst fddtD S )Nc                    s   g | ]}  qS r   r   r:   )rA   rW   r   r   r,   O   s     zFNCCLOpsTest.test_nccl_broadcast.<locals>.broadcast.<locals>.<listcomp>r=   r>   r   r@   )r"   rW   r@   r   	broadcastM   s    z2NCCLOpsTest.test_nccl_broadcast.<locals>.broadcastc                    s   g | ]\}} | qS r   r   rD   rE   r   r   r,   R   s     r   r   r   rH   rI   rJ   rK   rL   )rQ   r"   r#   rW   rR   rT   rU   rZ   r   )r#   r"   rW   rF   r   test_nccl_broadcastB   s       zNCCLOpsTest.test_nccl_broadcast)r"   r#   rW   r$   c           	         s   t |dkp|dk  fddtD dd tD }tjd||rP|| nd|d}d	d
 t|D }fdd}| tj|fddt|D || d S )NFr   c                    s    g | ]}t j t jqS r   r%   r)   r+   r   r   r,   \   s     z0NCCLOpsTest.test_nccl_reduce.<locals>.<listcomp>c                 S   s   g | ]}t d |qS r-   r.   r)   r   r   r   r,   ]   s     
NCCLReduce   orX   c                 S   s   i | ]\}}|t |qS r   r6   r7   r   r   r   r8   a   s      z0NCCLOpsTest.test_nccl_reduce.<locals>.<dictcomp>c                     s    t |  ksttj| ddgS )Nr   r9   )r=   r>   r&   r?   r@   rB   r   r   reducec   s    z,NCCLOpsTest.test_nccl_reduce.<locals>.reducec                    s   g | ]\}} | qS r   r   rD   rE   r   r   r,   h   s     r[   )	rQ   r"   r#   rW   r$   rR   rT   rU   r_   r   r#   r"   rF   r   test_nccl_reduceU   s$         zNCCLOpsTest.test_nccl_reduce)r"   r#   c           	         s    fddt D dd t D }dd t D }td||}dd t|D }fdd	}| tj|fd
dt|D ||}|D ].}tj	|d | | 
|d  |  qd S )Nc                    s    g | ]}t j t jqS r   r%   r)   r+   r   r   r,   n   s     z3NCCLOpsTest.test_nccl_allgather.<locals>.<listcomp>c                 S   s   g | ]}t d |qS r-   r.   r)   r   r   r   r,   o   s     c                 S   s   g | ]}t d |qS zo_{}r.   r)   r   r   r   r,   p   s     ZNCCLAllGatherc                 S   s   i | ]\}}|t |qS r   r6   r7   r   r   r   r8   r   s      z3NCCLOpsTest.test_nccl_allgather.<locals>.<dictcomp>c                     s&   t  kst fddtD S )Nc                    s   g | ]}t j d dqS )r   r9   )r&   stackr:   r@   r   r   r,   v   s     zFNCCLOpsTest.test_nccl_allgather.<locals>.allgather.<locals>.<listcomp>rY   r@   rB   r@   r   	allgathert   s    z2NCCLOpsTest.test_nccl_allgather.<locals>.allgatherc                    s   g | ]\}} | qS r   r   rD   rE   r   r   r,   y   s     r   rG   )	rQ   r"   r#   rR   rS   rT   rU   rd   r<   r   r`   r   test_nccl_allgatherk   s        zNCCLOpsTest.test_nccl_allgatherc                    s    fddt D dd t D }dd t D }td||}dd t|D }fdd	}| tj|fd
dt|D || d S )Nc                    s"   g | ]}t j t jqS r   r%   r)   )r#   r"   r   r   r,      s     z8NCCLOpsTest.test_nccl_reduce_scatter.<locals>.<listcomp>c                 S   s   g | ]}t d |qS r-   r.   r)   r   r   r   r,      s     c                 S   s   g | ]}t d |qS rb   r.   r)   r   r   r   r,      s     ZNCCLReduceScatterc                 S   s   i | ]\}}|t |qS r   r6   r7   r   r   r   r8      s      z8NCCLOpsTest.test_nccl_reduce_scatter.<locals>.<dictcomp>c                     sD   t | kstt|  t  jdks*t fddtD }|S )Nr	   c                    s   g | ]} |d d f qS r
   r   r)   Zreducedr   r   r,      s     zPNCCLOpsTest.test_nccl_reduce_scatter.<locals>.reduce_scatter.<locals>.<listcomp>)r=   r>   r?   shaper   )rA   refrB   rf   r   reduce_scatter   s
    z<NCCLOpsTest.test_nccl_reduce_scatter.<locals>.reduce_scatterc                    s   g | ]\}} | qS r   r   rD   rE   r   r   r,      s     )r   r   rH   rI   rJ   rK   rL   )rQ   r"   r#   rR   rS   rT   rU   ri   r   r`   r   test_nccl_reduce_scatter   s       z$NCCLOpsTest.test_nccl_reduce_scatteri r   ZdagZ	async_dagsimple)r"   r#   r   net_typec           
      C   s  dd t |D }dd t |D }td}|| _|| _t |D ]t}|jg || |gdt|d |jg || |gdt|d t |D ]*}	|j|| || g|| gt|d qqJ|j	||d	 gtd	d | j
| tj| j
j|d	   tj|f|| tjd
 d S )Nc                 S   s   g | ]}t d |qS r-   r.   r)   r   r   r   r,      s     z/NCCLOpsTest._test_nccl_sync.<locals>.<listcomp>c                 S   s   g | ]}t d |qS )zxe_{}r.   r)   r   r   r   r,      s     Zasdfg        )rg   valuer   g      ?)r   r   )rg   Z
fill_valueZdtype)r   r   NetZPrototypeZnum_workersZConstantFillr   ZSumr]   r   r   r&   rM   rN   Zblobsfetchfullr(   )
rQ   r"   r#   r   rl   rR   Zextra_inputsr   r   r   r   r   r   _test_nccl_sync   s,    



zNCCLOpsTest._test_nccl_syncZCAFFE2_BENCHMARKZ	Benchmarkc                    s   t dt D ]}dD ]}dd t |D }dd t |D }|rFdnd  fddt |D }td	}||| |  t |D ]$}| j|| 	|| t
| q| j| t| j|}td
}	t|	| t| j|	}
td|
|  qqd S )Nr!   )FTc                 S   s    g | ]}t jd t jqS )g    cAr%   r)   r   r   r   r,      s   z,NCCLOpsTest.test_timings.<locals>.<listcomp>c                 S   s   g | ]}t d |qS r-   r.   r)   r   r   r   r,      s     r0   r1   c                    s   g | ]}t d  |qS r2   r.   r)   r3   r   r   r,      s     testvanillazSpeedup for NCCL: {:.2f})r   r   NumGpuDevicesr   rn   r5   ZRunAllOnGPUr   Zcreate_blobfeedr   r   r   r   Z	Allreducer   r   )rQ   r"   r$   rF   rR   rS   r   r   Znet_timert   Zvanilla_timer   r3   r   test_timings   s*    
"
zNCCLOpsTest.test_timingsN)__name__
__module____qualname__r   stZintegersr   ru   ZbooleansrV   r\   ra   re   rj   Zsampled_fromrr   unittestskipIfosenvirongetrw   r   r   r   r   r    )   sD   






r    )r   r   )r|   Zhypothesis.strategiesZ
strategiesr{   Z
hypothesisr   r   Znumpyr&   r   r~   Zcaffe2.protor   Zcaffe2.pythonr   r   r   r   Z"caffe2.python.hypothesis_test_utilpythonZhypothesis_test_utilrK   r'   seedZInitOpsLibraryr   r   r}   Zhas_cuda_supportZHypothesisTestCaser    r   r   r   r   <module>   s   

