U
    *Ç-eÝ  ã                   @   s´   U d dl Z d dlZddlmZ d dlmZ d dlmZ d dlm	Z	m
Z
 G dd„ deƒZG d	d
„ d
eƒZG dd„ deƒZG dd„ deƒZdae
e	e
ej   ed< ejdœdd„ZdS )é    Né   )Úcomm)ÚFunction©Ú_get_device_index)ÚListÚOptionalc                   @   s$   e Zd Zedd„ ƒZedd„ ƒZdS )Ú	Broadcastc                 G   s¾   t dd„ |D ƒƒstdƒ‚dd„ |D ƒ}|| _t|ƒdkr@tƒ S t|ƒ| _|d  ¡ | _t 	|| j¡}g }t
| jdd … ƒD ]$\}}|s||D ]}| || ¡ qŒq|| j|Ž  tdd„ |D ƒƒS )	Nc                 s   s   | ]}|j jd kV  qdS ©ÚcpuN©ÚdeviceÚtype©Ú.0Úi© r   ú]/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/torch/nn/parallel/_functions.pyÚ	<genexpr>   s     z$Broadcast.forward.<locals>.<genexpr>z2Broadcast function not implemented for CPU tensorsc                 S   s   g | ]}t |d ƒ‘qS ©Tr   ©r   Úxr   r   r   Ú
<listcomp>   s     z%Broadcast.forward.<locals>.<listcomp>r   r   c                 S   s   g | ]}|D ]}|‘qqS r   r   )r   ZtensorsÚtr   r   r   r      s       )ÚallÚAssertionErrorÚtarget_gpusÚlenÚtupleÚ
num_inputsÚ
get_deviceÚinput_devicer   Zbroadcast_coalescedÚ	enumerateZneeds_input_gradÚappendZmark_non_differentiable)Úctxr   ÚinputsÚoutputsZnon_differentiablesÚidxZinput_requires_gradÚoutputr   r   r   Úforward   s"    ÿ

zBroadcast.forwardc                 G   s   dt j| j| jf|žŽ  S )N©N)ÚReduceAddCoalescedÚapplyr!   r   ©r$   Zgrad_outputsr   r   r   Úbackward    s    zBroadcast.backwardN©Ú__name__Ú
__module__Ú__qualname__Ústaticmethodr)   r.   r   r   r   r   r	   
   s   
r	   c                   @   s$   e Zd Zedd„ ƒZedd„ ƒZdS )r+   c                    sL   ‡ fdd„t dtˆ ƒˆƒD ƒ| _‡ ‡fdd„t dtˆ ƒˆƒD ƒ}t ||¡S )Nc                    s   g | ]}ˆ |   ¡ ‘qS r   ©r    r   )Úgradsr   r   r   )   s     z.ReduceAddCoalesced.forward.<locals>.<listcomp>r   c                    s   g | ]}ˆ ||ˆ … ‘qS r   r   r   ©r5   r   r   r   r   +   s   ÿ)Úranger   r   r   Zreduce_add_coalesced)r$   Údestinationr   r5   Zgrads_r   r6   r   r)   '   s
     ÿzReduceAddCoalesced.forwardc                 G   s   dt j| jf|žŽ  S )N©NN)r	   r,   r   r-   r   r   r   r.   /   s    zReduceAddCoalesced.backwardNr/   r   r   r   r   r+   %   s   
r+   c                   @   s$   e Zd Zedd„ ƒZedd„ ƒZdS )ÚGatherc                    sÂ   t dd„ |D ƒƒstdƒ‚|dkr*dˆ _nt|dƒ}|ˆ _|ˆ _tdd„ |D ƒƒˆ _t dd„ |D ƒƒr’|dkr’td	d„ |D ƒƒ}t d
¡ dˆ _	ndˆ _	t‡ fdd„|D ƒƒˆ _
t |ˆ jˆ j¡S )Nc                 s   s   | ]}|j jd kV  qdS r
   r   r   r   r   r   r   8   s     z!Gather.forward.<locals>.<genexpr>z/Gather function not implemented for CPU tensorsr   Tc                 s   s   | ]}|  ¡ V  qd S r*   r4   r   r   r   r   r   A   s     c                 s   s   | ]}|  ¡ d kV  qdS ©r   N)Údim©r   r   r   r   r   r   B   s     r   c                 s   s   | ]}|  d ¡V  qdS )r   N)Úviewr=   r   r   r   r   C   s     zvWas asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.Fc                 3   s   | ]}|  ˆ j¡V  qd S r*   )Úsizer<   r   ©r$   r   r   r   J   s     )r   r   Útarget_devicer   r<   r   Ú
input_gpusÚwarningsÚwarnÚunsqueezed_scalarÚinput_sizesr   Úgather)r$   rA   r<   r%   r   r@   r   r)   6   s     ÿ

zGather.forwardc                 C   s6   t  | j| j| j|¡}| jr.tdd„ |D ƒƒ}d| S )Nc                 s   s   | ]}|d  V  qdS r;   r   )r   Úgr   r   r   r   Q   s     z"Gather.backward.<locals>.<genexpr>r9   )ÚScatterr,   rB   rF   r<   rE   r   )r$   Úgrad_outputZscattered_gradsr   r   r   r.   M   s    zGather.backwardNr/   r   r   r   r   r:   4   s   
r:   c                   @   s$   e Zd Zedd„ ƒZedd„ ƒZdS )rI   c           
   
   C   sÄ   dd„ |D ƒ}|| _ |jjdkr(| ¡ nd| _d }tj ¡ rT| jdkrTdd„ |D ƒ}t 	|||| j |¡}|d k	rÀt
|ƒD ]F\}}tj || ¡( tj ¡ }	|	 || ¡ | |	¡ W 5 Q R X qx|S )Nc                 S   s   g | ]}t |d ƒ‘qS r   r   r   r   r   r   r   Y   s     z#Scatter.forward.<locals>.<listcomp>r   éÿÿÿÿc                 S   s   g | ]}t t d |¡ƒ‘qS )Úcuda)Ú_get_streamÚtorchr   )r   r   r   r   r   r   _   s     )r<   r   r   r    r!   rN   rL   Zis_availabler   Zscatterr"   Zcurrent_streamZwait_streamZrecord_stream)
r$   r   Zchunk_sizesr<   ÚinputÚstreamsr&   r   r(   Zmain_streamr   r   r   r)   W   s    
zScatter.forwardc                 G   s   d d d t j| j| jf|žŽ fS r*   )r:   r,   r!   r<   )r$   rJ   r   r   r   r.   j   s    zScatter.backwardNr/   r   r   r   r   rI   U   s   
rI   Ú_streams)r   c                 C   sh   | j dkrdS tt| j dƒ}|dkr(dS tdkr>dg| ¡  at| j dkr^| | j¡t| j< t| j S )zBGets a background stream for copying between CPU and target devicer   N)r   ÚgetattrrN   rQ   Zdevice_countÚindexÚStream)r   Z
device_modr   r   r   rM   r   s    
rM   )rC   rN   Ú r   Ztorch.autogradr   Ztorch._utilsr   Útypingr   r   r	   r+   r:   rI   rQ   rT   Ú__annotations__r   rM   r   r   r   r   Ú<module>   s    !