U
    *-eMy                     @   s  U d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
mZmZmZmZmZmZmZmZ d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
l m!Z! d dl"m#Z# d dl$m%Z%m&Z&m'Z'm(Z(m)Z) d dl*m+Z+ d dl,m-Z-m.Z. d dl/m0Z0 d dl1m2Z2m3Z3m4Z4m5Z5 da6ee j7 e8d< ej9j:Z:G dd deZ;eG dd dZ<eG dd dZ=e
e>dddZ?dMej@jAee
df eeeBe
f  eeejCeejDeee' ee' f f  e
dddZEdd  ZFejjGe#dd!d"d#ZHeejGe
f e
e
d$d%d&ZIeee=eJf  eeeeJ ee' f d'd(d)ZKejGee
df ed*d+d,ZLejGee
df ed*d-d.ZMejGee
df eeBe
f eed/d0d1ZNejGee
df eeBe
f eed/d2d3ZOejGee
df eeBe
f eed/d4d5ZPe:jQjReLe:jSjReLe:jTjReMe:jUjReLiZVeej@jAef e8d6< e:jWjReNe:jXjReOe:jXjYeOiZZeej@jAef e8d7< e:j[jRePe:jXjYePe:j\jRePiZ]eej@jAef e8d8< d9dd:ejGeejGe
f e>ee eej^ d;d<d=Z_eeejGe
f eej^e
f d>d?d@Z`ej^ejGeejGe
f ejGdAdBdCZaej^eejjGejj^f ddDdEdFZbejceejGeejG f dGdHdIZddNej^eejC ee< ee e>eej^eeBe<f f dJdKdLZedS )O    N)	dataclass)autoEnum)partial)	AnyCallablecastDictListOptionalSequenceTupleUnion)_get_tracer)OP)
get_logger)
DeviceMeshDTensor)_operator_dispatch)OpSchema)_PartialDTensorSpec	Placement	ReplicateShard)redistribute_local_tensor)make_fx
proxy_slot)TensorMetadata)tree_flattentree_maptree_map_onlytree_unflattenloggerc                   @   s   e Zd Ze Ze ZdS )TrainingPhaseN)__name__
__module____qualname__r   ZFORWARDZBACKWARD r(   r(   c/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/torch/distributed/_spmd/distribute.pyr$   %   s   r$   c                   @   s"   e Zd ZU eed< ee ed< dS )Schemamesh
placementsN)r%   r&   r'   r   __annotations__r
   r   r(   r(   r(   r)   r*   *   s   
r*   c                   @   sP   e Zd ZU dZeed< eed< eed< edddZe	e
jed dd	d
ZdS )DSymInta?  
    DSymInt represents a value retrieved by a SymInt op from a DTensor. DSymInt
    helps View and Factory ops to determine the placement and shape of the
    output tensor, as those operators either do not have an input DTensor or
    the input DTensor is insufficient to determine the output tensor's placement.
    global_valuelocal_valuer+   )returnc                 C   s   | j | jkS N)r0   r/   )selfr(   r(   r)   is_shard=   s    zDSymInt.is_shard)nodedtensorr1   c                 C   s   d}|j tjkr@tt|jd }| ||| ||jdS |j tj	krh| |
 | 
 |jdS |j tjkrtt|jd }| ||| ||jdS td|j  d S )Nr      )r/   r0   r+   zDSymInt does not support )targetatensym_sizer   intargssizeto_localdevice_mesh	sym_numelZnumel
sym_stridestrideNotImplementedError)clsr5   r6   dimr(   r(   r)   	from_node@   s,    
zDSymInt.from_nodeN)r%   r&   r'   __doc__r;   r-   r   boolr4   classmethodfxNoder   rF   r(   r(   r(   r)   r.   0   s   
r.   )objr1   c                 C   s4   t | tsdS d}| jD ]}t |trd} q0q|S )zDcheck if object is 1) DTensor and  2) with any placement of _PartialFT)
isinstancer   r,   r   )rL   Z
is_partialZ	placementr(   r(   r)   _is_partial_dtensor[   s    


rN   .)op
local_argskwargsspecsr1   c                    s<   |d kri } d kri  t t d fdd}| t|||S )Nargr1   c              	      sz    |  \}}}}t || j| j|  tj| ji d}t|t||d}t|t||d}t	| tj
rv|  krvt| ||S | S )N)dtyperequires_gradrB   Zmemory_formatis_quantizedZqparams)tensor_meta)r   rU   rV   rB   torchZcontiguous_formatrW   r   tuplerM   Tensorr   )rT   Ztensor_shaper+   Zcurrent_placementZtarget_placementrX   Zcurrent_specZtarget_specrR   r(   r)   redistributey   s4    	    
z2_dispatch_with_local_tensors.<locals>.redistribute)r   r    )rO   rP   rQ   rR   r]   r(   r\   r)   _dispatch_with_local_tensorsi   s    r^   c                 C   s~   t | \}}t |j\}}i }t|D ]D\}}	t|	tr&|r`|	 || j|	j|| jf||	j< |	j||< q&t	||}
||
fS r2   )
r   args_schema	enumeraterM   r   r=   r+   r,   _local_tensorr"   )r<   target_schemar]   flatten_argsargs_tree_specZflatten_args_schema_rR   irT   unflattened_argsr(   r(   r)   _update_specs_for_redistribute   s    
	


rh   )r5   	op_schemar1   c                 C   s   t | j\}}t |j\}}tttjjf tddd}t	|t	|ksLt
tt||D ]&\}\}}	||rZt|	trZ|	||< qZt||}
t|
D ]\}}| || qd S )NrS   c                 S   s.   t | tjjr$| jtjtjtjfkS t | t	S r2   )
rM   rY   rJ   rK   r8   r9   r:   r@   rA   r;   )rT   r(   r(   r)   is_sym_int_or_int   s    z6_update_node_from_op_schema.<locals>.is_sym_int_or_int)r   r<   r_   r   r;   rY   rJ   rK   rH   lenAssertionErrorr`   ziprM   r"   Z
update_arg)r5   ri   	flat_argsrd   Zflat_args_schemare   rj   rf   rT   Z
arg_schemar<   idxr(   r(   r)   _update_node_from_op_schema   s    	

rp   )node_to_objrT   r1   c                 C   s>   t |tjjr6| | }t r2ttttf |jt	= |S |S d S r2   )
rM   rY   rJ   rK   r   r   r	   r   __dict__r   )rq   rT   rL   r(   r(   r)   
_remap_arg   s    rs   )sizesr+   r1   c                 C   sZ   dd | D }dd t | D p&t g}t||jksRtdt| d|j d||fS )Nc                 S   s    g | ]}t |tr|jn|qS r(   rM   r.   r0   .0sr(   r(   r)   
<listcomp>   s    z)unpack_sizes_and_dims.<locals>.<listcomp>c                 S   s*   g | ]"\}}t |tr| rt|qS r(   )rM   r.   r4   r   )rw   rf   ar(   r(   r)   ry      s   
 z"The number of sharded dimensions (z2) must match number of dimensions in device mesh (z).)r`   r   rk   ndimrl   )rt   r+   local_sizesr,   r(   r(   r)   unpack_sizes_and_dims   s    r}   )r5   r<   r1   c                 C   s   t |dks"td| j d| t|d tsBtd|d  t|d tsbtd|d  t|d |d j\}}| jd |f| _t	t
jj| j}tj||d j||d j|dd	S )
N   zExpect two args but got op z with args r   z*Expect 1st argument to be DTensor but got r7   $Expect 2nd argument as list but got FZlocal_tensorr?   r,   	run_check)rk   rl   r8   rM   r   listr}   r?   r<   r   rY   _ops
OpOverload
from_localra   )r5   r<   r|   r,   rO   r(   r(   r)   binop_sym_int_consumer_rule   s"    "  r   c           
   	   C   s\   |\}}}}}}dd |D }t j||j|jd}	tjt |	| |||||j|j	ddS )Nc                 S   s    g | ]}t |tr|jn|qS r(   ru   rv   r(   r(   r)   ry     s    z7slice_backwad_sym_int_consumer_rule.<locals>.<listcomp>)devicerU   Fr   )
rY   zerosr   rU   r   r   Zslice_scatterr>   r?   r,   )
r5   r<   Zgrad_outputZinput_sizesrE   startendstepr|   Zinput_tensorr(   r(   r)   #slice_backwad_sym_int_consumer_rule  s,           r   )r5   r<   rQ   default_meshr1   c                 C   s   t |d }tdd |D r6td| j d| dt|d tsVtd|d  t|d |\}}|f|dd  | _tt	j
j| j}tj|| j|||d	d
S )Nr   c                 s   s   | ]}t |tV  qd S r2   )rM   r   rw   rz   r(   r(   r)   	<genexpr>#  s     z*factory_with_sizes_rule.<locals>.<genexpr>z4Not expect DTensor argument for factory op, but got z with arguments .r   r7   Fr   )r   anyrl   r8   rM   r   r}   r<   r   rY   r   r   r   r   )r5   r<   rQ   r   rn   r|   r,   rO   r(   r(   r)   factory_with_sizes_rule  s     
r   c                 C   s>   t dd || _ttjj| j}tj|| j||t	 gddS )Nc                 S   s   t | tr| jS | S r2   ru   rz   r(   r(   r)   <lambda>:      z%factory_arange_rule.<locals>.<lambda>Fr   )
r    r<   r   rY   r   r   r8   r   r   r   r5   r<   rQ   r   rO   r(   r(   r)   factory_arange_rule4  s    
r   c                 C   s>   || | _ | _ttjj| j}tj|| j | j|t	 gddS )NFr   )
r<   rQ   r   rY   r   r   r8   r   r   r   r   r(   r(   r)   default_factory_op_ruleD  s    r   VIEW_SYM_INT_CONSUMERSFACTORY_SYM_INT_CONSUMERSFACTORY_OPSF)force_make_fxr   )r5   rq   r   r   r1   c             
   C   s  t    ttt|| j}ttt|| j}tt jj	| j
}tdd t|d D r|tkrt|dks|td| t| | ||| < W 5 Q R  d S |tkr|d k	stdt| | ||||| < W 5 Q R  d S tttjsttd|| | j
tjjkrtjj}tdd |}td	d |}|tkrXt| | ||||| < W 5 Q R  d S t|||tj\}}}	||| < |	d k	st|	jst| | W 5 Q R  d S |	j d k	st|	j d }
t!||
|	j\}}tt"|||d
}t#|dd|}|j$%  |W  5 Q R  S Q R X d S )Nc                 s   s    | ]}t |tr| V  qd S r2   )rM   r.   r4   r   r(   r(   r)   r   |  s     
 z._get_dtensor_dispatch_graph.<locals>.<genexpr>r   zExpect empty kwargs, but got z%Requires default mesh for factory opszYAssuming using local_value from SymInt for %sis mathematically correct. Full args are %s.c                 S   s   t | tr| jS | S r2   ru   r   r(   r(   r)   r     r   z-_get_dtensor_dispatch_graph.<locals>.<lambda>c                 S   s   t | tr| jS | S r2   ru   r   r(   r(   r)   r     r   )rQ   rR   F)Z_allow_non_fake_inputs)&rY   Zno_gradr    r   rs   r<   rQ   r   r   r   r8   r   r   r   rk   rl   r   rM   r#   loggingLoggerwarningr9   viewdefaultZreshaper   r   r   Z_propagatorZneeds_redistributerp   Zschema_suggestionsrh   r^   r   grapheliminate_dead_code)r5   rq   r   r   r<   rQ   Zop_overloadoutri   Zoutput_shardingrb   Zupdated_args_specrg   dispatchgmr(   r(   r)   _get_dtensor_dispatch_graphn  s        
   


  
r   )dtrq   r1   c           	      C   s   t jt jt jddd}| j}t | j}t|||}dd |jjD }dd |jjD }t|dksltt|dks|t| ||d	 < t	j
|| jt gd
d||d < t|d	 |dd}|dk	st|||d	  fS )a   
    Creates a graph for a dummy add function from a partial DTensor.
    This dummy add is used for triggering all_reduce on a Partial DTensor
    during the DTensor expansion of the traced graph.
    Also returns the actual DTensor after resharding.
    )gradzeror1   c                 S   s   | | S r2   r(   )r   r   r(   r(   r)   	dummy_add  s    z)_build_dummy_add_graph.<locals>.dummy_addc                 S   s   g | ]}|j tjkr|qS r(   )rO   r   PLACEHOLDERrw   nr(   r(   r)   ry     s      z*_build_dummy_add_graph.<locals>.<listcomp>c                 S   s   g | ]}|j tjkr|qS r(   )rO   r   CALL_FUNCTIONr   r(   r(   r)   ry     s      r~   r7   r   Fr   T)r   N)rY   r[   ra   Z
zeros_liker   r   nodesrk   rl   r   r   r?   r   r   )	r   rq   r   r   r   Z
traced_addZplaceholdersZcall_functionstraced_dispatchr(   r(   r)   _build_dummy_add_graph  s,    
     r   )r   r5   rq   r1   c                    s  g }d}|j d D ]}t|tjs0|| q|| }t|sL|| qd}tt|}t||\}}	dd |j	j
D }
dd |j	j
D }t|
dkrt|dkst|d |
d  |j	  |	||
d < i  |j	j
D ]}|jtjkr| |< q|jtjkrt|j dkr(t|j d dksDtd|j  d	t|j  | |j d d   ||j d d  | |j d d  < q|jtjkrt| |jt||j | j	|  | j	| fd
d |< W 5 Q R X qq|r| j	| | j	|S |S d S )NFr   Tc                 S   s$   g | ]}|j d ks|j dkr|qS )Z	wait_commZwait_tensornamer   r(   r(   r)   ry     s   
 
z#_convert_output.<locals>.<listcomp>c                 S   s   g | ]}|j d kr|qS )addr   r   r(   r(   r)   ry     s     
 r7   !Expecting single output, but got  c                    s    |  S r2   r(   r   Zvalue_remapr(   r)   r   A  r   z!_convert_output.<locals>.<lambda>)r<   rM   rJ   rK   appendrN   r   r   r   r   r   rk   rl   replace_all_uses_withr   rO   r   r   OUTPUTZGET_ATTRsetattrr8   getattrinserting_before	node_copy
erase_nodeoutput)r   r5   rq   new_argsZhas_partialargumentrL   r   r   Z
result_objwaitr   dtnr(   r   r)   _convert_output  s\    




&
(r   )r   node_replacementsr1   c              
      s  | j jD ]}||krq|| }t|j\}}di  } |j jD ]$}|jtjkr@||  |< |d7 }q@| j | |j jD ]h}|jtjkrq~|jtjkrt	|jdkst
d|j dt	|jd  |jd }t	|dkr|d }	nd }
t|D ]\}}|d krq|jdkst
|jjdks0t
|jjdksBt
|
d ks`|
|jd ks`t
|jd }
|jd |kst
q|
d k	st
|
}	 |	 }|| q~| j | fdd	 |< td
d ||fD r~| |   qq~| j | W 5 Q R X q| j   |   d S )Nr   r7   r   r   Zcall_function	_operatorgetitemc                    s    |  S r2   r(   r   r   r(   r)   r     r   z _rebuild_graph.<locals>.<lambda>c                 s   s.   | ]&}t |jtjjo$|jjjd V  qdS ))zaten::_foreachzaten::_fused_adamN)rM   r8   rY   r   r   Z_schemar   
startswithr   r(   r(   r)   r     s
   
z!_rebuild_graph.<locals>.<genexpr>)r   r   r   r<   rO   r   r   r   r   rk   rl   r`   r8   r&   r%   r   r   allr   r   Z	recompile)r   r   r5   r   rc   re   rf   r   outputsr   sourcer   new_noder(   r   r)   _rebuild_graphI  sZ    






r   )r   r1   c                    sl   i i t jt jd dfdd t| jD ]8t jj fdd t jj fdd q.S )N)arg_nodeconsumerr1   c                    s&   | kr"|| <   |g |  d S r2   )
setdefaultr   )r   r   )last_consumer_to_nodesnode_to_last_consumerr(   r)   _register_final_consumer  s    z=_get_last_consumer_to_nodes.<locals>._register_final_consumerc                    s
    | S r2   r(   )r   r   r5   r(   r)   r     r   z-_get_last_consumer_to_nodes.<locals>.<lambda>c                    s
    | S r2   r(   )Z
kwarg_noder   r(   r)   r     r   )rJ   rK   reversedr   r5   Zmap_argr<   rQ   )r   r(   )r   r   r5   r   r)   _get_last_consumer_to_nodes  s     r   )r   inpsschemasr   _allow_partialr1   c                    s  t dadd tjD }i }i }t| j}i }	t| jjD ]\}
}tdk	sPtt	d|
|j
|j |j
tjkr|
t|k std|
d  dt| d	tj||
  ||
 j||
 jd
d||< nt|jtjjrtt||jd  }t||||< nt|jtjjr8t|||d}|dk	r|||< n|j
tjkr|sXt| ||}|jd D ]<}t|t j!rb|| }t|trbt"|j#|j|	|j$< qbnB|j
tj%krt&t't(||j}t&t't(||j)}t*t+dd |t,|-   |j|krt dkrt. fdd D s.tdt/tdd |}t/tdd |}t/tdd |}t/tdd |}||_||_)t|j|||j|| d jd||< n0t dkstd|j d|j||||< nt0d|j
 ||kr:|| D ]}||= qq:t1| | | |	fS )zc
    Returns:
        - transformed graph module
        - map from output name to DTensorSpec
    Zspmd_expc                 S   s   h | ]}t t|qS r(   )r   operator)rw   r   r(   r(   r)   	<setcomp>  s     z*_convert_to_distributed.<locals>.<setcomp>Nznode%s: op=%s target=%szgot more placeholder nodes (r7   z) than inputs ()Fr   r   )r   c                 S   s
   t | tS r2   )rM   r.   r   r(   r(   r)   r     r   z)_convert_to_distributed.<locals>.<lambda>c                 3   s   | ]} d  j |j kV  qdS )r   N)r+   )rw   dZdsymintsr(   r)   r     s    z*_convert_to_distributed.<locals>.<genexpr>z&all DSymInts must have the same mesh. c                 S   s   | j S r2   r0   r   r(   r(   r)   r   	  r   c                 S   s   | j S r2   r   r   r(   r(   r)   r   
  r   c                 S   s   | j S r2   r/   r   r(   r(   r)   r     r   c                 S   s   | j S r2   r   r   r(   r(   r)   r     r   )r0   r/   r+   zBSPMD expansion does not support SymInt in non-operator nodes, got r   zUnrecognized node.op type )2r   r#   r   __all__r   r   r`   r   rl   inforO   r8   r   r   rk   r   r   cloner+   r,   rM   rY   r   ZOpOverloadPacketr   r<   r.   rF   r   r   r   r   rJ   rK   r*   r?   r   r   r    r   rs   rQ   r   filterrZ   valuesr   r!   
ValueErrorr   )r   r   r   r   r   	operatorsrq   r   r   Zoutput_schemasrf   r5   r6   replacementZinp_argrL   r<   rQ   rP   Zlocal_kwargsZglobal_argsZglobal_kwargsr   r(   r   r)   _convert_to_distributed  s    

  
 


r   )NN)NF)fr   r   dataclassesr   enumr   r   	functoolsr   typingr   r   r   r	   r
   r   r   r   r   rY   Z(torch.distributed._spmd.experimental_opsZtorch.fxrJ   Z#torch.distributed._spmd.comm_tensorr   Z#torch.distributed._spmd.graph_utilsr   Z!torch.distributed._spmd.log_utilsr   Ztorch.distributed._tensorr   r   Z"torch.distributed._tensor.dispatchr   Z#torch.distributed._tensor.op_schemar   Z)torch.distributed._tensor.placement_typesr   r   r   r   r   Z&torch.distributed._tensor.redistributer   Z"torch.fx.experimental.proxy_tensorr   r   Ztorch.fx.passes.shape_propr   Ztorch.utils._pytreer   r    r!   r"   r#   r   r-   Zopsr9   r$   r*   r.   rH   rN   r   r   strr[   Sizer^   rh   rK   rp   rs   r;   r}   r   r   r   r   r   Z_unsafe_viewr   expandZslice_backwardr   r   fullZaranger   r   Zscalar_tensorr   r   ZGraphModuler   r   r   r   ZGraphr   r   r(   r(   r(   r)   <module>   s   ,*  
/   






       
   m &KW  