U
    *-eс                     @   s~  d dl Z d dlmZ d dlmZ d dlmZmZ d dlm	Z	m
Z
mZ d dlmZ d dlZd dlmZ d dlZd dlmZmZ d d	lmZ d
gZG dd deZdd ZedddZeddG dd de jjZdd Zdd Ze j j!j"j#e j j!j$j#e j j!j%j#e j j!j&j'e j j!j(j#e j j!j)j*e j j!j+j#e j j!j,j#iZ-ee e'dddZ.ee ee ee dddZ/ed dd!d
 Z0dS )"    N)Node)compatibility)FakeTensorMode
FakeTensor)tree_maptree_flattentree_map_only)StorageWeakRef)Enum)SetDict)defaultdict	reinplacec                   @   s   e Zd ZdZdZdZdS )	_ViewTyper         N)__name__
__module____qualname__NonViewSingleOutputViewMultiOutputView r   r   Z/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/torch/fx/passes/reinplace.pyr      s   r   c                 C   sL   | d k	rHt | tjjrH| j}t|jdkrH|jd }|jd k	oF|jj S d S )Nr   )	
isinstancetorch_ops
OpOverload_schemalen	arguments
alias_infois_writeZtgtZschemaZ	first_argr   r   r   _is_view_op   s
    
r$   )returnc                 C   sd   | d k	r^t | tjjr^| j}t|jdkr^|jd }|jd k	r^|jjs^d|jj	krXt
jS t
jS t
jS )Nr   *)r   r   r   r   r   r   r    r!   r"   Z	after_setr   r   r   r   r#   r   r   r   _get_view_type   s    
r'   F)Zis_backward_compatiblec                       s.   e Zd Zed fddZ fddZ  ZS )_FunctionalizationMetadataPropnodec           	         sr  |  j d7  _ t |}||jd< | j |jd< |j}|jtjjj	j
krT|dd  }|jdkrt|j}|tjkrt|jd tst|jd |jd< n`|tjkr|jd | j|< nD|jtjkr|jd }| j|d }|d k	rt|tst||jd< d|jkrnt|jd tstt|jd jd ts6tt|jd  }t|jd jd  }||ksnt|S )Nr   fake_resultnode_idxcall_functionr   view_of)node_countersuperrun_nodemetaargstargetr   opsatencopy_defaultopr'   r   r   r   r   AssertionErrorr   multi_output_view_nodes	_operatorgetitemgetr   r	   _typed_storage)	selfr*   resultZ	node_argsZ	view_typeZlist_argZmaybe_base_of_viewZview_storageZbase_storage	__class__r   r   r1   8   s6    






z'_FunctionalizationMetadataProp.run_nodec              
      sH   i | _ d| _t ,  fdd|D }t j| W  5 Q R  S Q R X d S )Nc                    s   g | ]}  |qS r   )Zfrom_tensor.0amoder   r   
<listcomp>s   s     z<_FunctionalizationMetadataProp.propagate.<locals>.<listcomp>)r;   r/   r   r0   run)r@   r3   Z	fake_argsrB   rH   r   	propagaten   s
    z(_FunctionalizationMetadataProp.propagate)r   r   r   r   r1   rL   __classcell__r   r   rB   r   r(   5   s   6r(   c                 C   s   |j do|j d d | j k}t| jt|jkoNtdd t| j|jD }|jd jd k	rn|jd jjsrttdd |jdd  D st|o|S )N_rD   c                 s   s   | ]\}}|j |j kV  qd S N)type)rF   Za1Za2r   r   r   	<genexpr>x   s    z!_schemas_match.<locals>.<genexpr>r   c                 s   s   | ]}|j d kV  qd S rO   )r!   rE   r   r   r   rQ   }   s     r   )	nameendswithr   r    allzipr!   r"   r:   )Zfunctional_schemaZinplace_schemaZnames_matchZarg_types_matchr   r   r   _schemas_matchv   s     " rV   c                    s   t tjjsd S trd S jdd }jj}t	tj
|}|d krNd nt	|| dd   d krld S  fdd  D }fdd|D }t|dkrd S t|dkst|d }|S )	N.rD   rN   c                    s   g | ]}t  |qS r   )getattr)rF   Zoverload_name)maybe_inplace_opr   r   rJ      s    z)_maybe_get_inplace_op.<locals>.<listcomp>c                    s   g | ]}t  j|jr|qS r   )rV   r   )rF   f)r9   r   r   rJ      s   r   r   )r   r   r   r   r$   r   splitZoverloadpacketr   rX   r5   Z	overloadsr   r:   )r9   Zop_namespaceZop_base_nameZmaybe_namespace_moduleZinplace_overloadsZ'inplace_overloads_with_matching_schemasZ
inplace_opr   )rY   r9   r   _maybe_get_inplace_op   s*    

	r\   )tensor_aliasesop_indexc                 C   sv   dd }t  }| D ]^}|j}|D ]N}d|jks |jd |kr>q || krdt|jtjjs |jtj	krdq |
| q q|S )Nc                 S   s    t | tr|t|   d S rO   )r   r   addr	   r?   )xset_r   r   r   _add_if_tensor   s    
z2_get_all_later_node_usages.<locals>._add_if_tensorr,   )setusersr2   r   r4   r   r   r   r<   r=   r_   )r]   r^   rb   Znodes_used_aftertZusage_nodesnr   r   r   _get_all_later_node_usages   s    rg   )later_node_usagesself_aliasesr%   c              
   C   s2  dd }t  }t| dd dD ]}|jtkr0q|jd }|jd }t|tsRtt|jd t	sftt|tsttt|jd t	stt|j }|D ]}d	|jkrq|jd	 }	z^||	jd f|jd
d  |j
}
|jd }||	jd |jd r||
|r|| W q tk
r(   Y qY qX qq|S )Nc                 S   s0   |   |  ko.|  | ko.|  | kS rO   )sizeZstrideZstorage_offset)rG   br   r   r   matching_view_metadata   s
    z=_get_view_inverse_node_usages.<locals>.matching_view_metadatac                 S   s
   | j d S )Nr,   r2   r`   r   r   r   <lambda>       z/_get_view_inverse_node_usages.<locals>.<lambda>)keyr   r   r+   r.   r   )rc   sortedr4   _VIEW_INVERSE_MAPr3   r   r   r:   r2   r   kwargsr_   	Exception)rh   ri   rl   Zview_inverse_nodesrf   baseZmutated_viewZoriginal_viewZ
self_aliasZself_alias_baseZview_replay_metadataZexpected_metadatar   r   r   _get_view_inverse_node_usages   s4    





"
rw   Tc           (   
      sl  t | j|  dd | jjD }tt| jjD ], d jkr0 fdd}t| jd  q0t }t| jjD ]\}j	dkrpt
jtjjsqptjjjdk rqptjjjd jtjkrqpjd tjd \}}tjd \}}d	}	t|t|kr\t||D ]F\}
}|
 | kr2d
}	|
j|jkrDd
}	t|
dkrd
}	q|	rxjtjjjjkrxqpj}tjd   }||krqptfddjD dkrqptjd   }| }t!|jd }t"||}t|| dk}|sqpjt#kr|krt#j }| j$Z jd }jdd }| j%d|ft&| j'}| j%dtjjj(j||fi }W 5 Q R X |) nt*j}|dkrqp|_tjd   }| +|  | +|  |+| t,-g|D ]8jd fddj.D }|D ]}g }|j}fdd}t/t0||j|_t/t0||j'|_'tjd \}}t|jd \}}dd |D } dd |D }!t| dkr2t|!dkr2| |!kr2tjd \}"}dd |"D }#t|#dkst1| \}$|#\}%|!\}&|& +|%  |% +|&  q2q
qp|D ]}'| j2|' qL| 3  | S )a   
    Given an fx.GraphModule, modifies it to perform "reinplacing",
    mutating the nodes of the graph.
    We look for out-of-place op call sites like `b = a.add(...)`,
    and convert them to be inplace (`b = a.add_(...)`),
    as long as the input to the current operator ("a") isn't re-used
    anywhere later in the graph.

    This pass currently expects to operate on a **functional, ATen** graph.
    This can be obtained by running `make_fx(functionalize(f))`.

    Sample inputs are needed to determine aliasing relationships of the inputs.
    In general, we can't reinplace node `b = a.add(...)` if "a" aliases any of the
    inputs to the program.

    Given a node "b = foo(a, args...) the algorithm for re-inplacing is as follows:

    (1) Perform some initial checks on the metadata of "a" and "args..."
        that can disqualify them from being reinplaced.

      (1a) Check that the self argument we're attempting to reinplace
           has acceptable dtype/size metadata to reinplace with.

           For example, if we have:
             a = torch.ones(1)
             b = torch.ones(10)
             out = torch.add(a, b)
           We can't turn that into
             a.add_(b)
           Because that would require resizing "a".

           Similarly, we can't convert torch.ge(a, b) into a.ge_(b),
           because that would require changing a's dtype (from e.g. float32 to bool).
           Note that in this specific example, we could technically do better..

           If we see the pattern:
             a_1 = a.ge(b)
             a_2 = aten._to_copy(a_1, a.dtype)
           Then we this should be valid to completely re-inplace
           (this is exactly what functionalization will emit when it sees a.ge_(b)).

           This optimization is only really important for user programs
           that directly use inplace comparison ops though.

           We also cannot re-inplace on tensors that have overlapping memory,
           e.g. torch.ones(1).expand(4, 4).add_(1)

      (1b) Check if "a" is an alias of any of the program inputs.

          If it is, skip and move to the next node.
          Inplace'ing an op that would cause it to mutate a program is not sound,
          because that would be a side effect visible to the user.

          NOTE: there's a future optimization that we should make:
          if "a" is a (alias of a)  program input, but later in the program
          there is a node that looks like "a.copy_(...)",
          Then re-inplacing is ok to do - we are temporarily re-using a's buffer,
          which will later be overwritten by the copy_() call.

          This will be an important optimization to have for programs that mutate
          their inputs. It currently isn't implemented though.

      (1c) Check if "a" and "args..." alias

          For example, re-inplacing to create code like the below
          isn't guaranteed to be sound:

            aten.mul_(a, a)

    (2) Check that "a" and all of its outstanding aliases are not used anywhere
        later in the graph. If this is the case, then it's safe to re-inplace
        to "b = foo_(a)".

        There are a few caveats to this, explained in more detail below:
        (a) If "a" is used later as an argument to a view op, that is okay.
            It's only a problem if "a" (or that view) is later passed
            into a normal operator, or if it is returned as the program output.
        (b) If "a" is a repeat argument in `foo()`, then don't reinplace.
            Most ATen kernels don't make any guarantees that this is sound,
            e.g. if you do aten.mul_(a, a).
            So we'll just ban re-inplacing in this case.
            It's only a problem if "a" (or that view) is later passed
        (c) If "a" is used as an input into a view "inverse" / "scatter"
            operator, it is potentially fine to re-inplace
            (and remove that scatter operator from the graph).
            See below for a more detailed example.

        NOTE: there is an optimization in this step that is crucial
        to fully recovering performance from functionalization.

        Given this program:
        def f(x):
            a = torch.ops.aten.add(x, x)
            b = torch.ops.aten.diagonal(a)
            torch.ops.aten.fill_(b, 0)
            return d

        Functionalization will emit the following:
        def f(x):
            a = torch.ops.aten.add(x, x)
            b = torch.ops.aten.diagonal(a, 0, 1)
            b_updated = torch.ops.aten.fill(b, 0)
            a_updated = torch.ops.aten.diagonal_scatter(a, b_updated, 0, 1)
            return a_updated

        Ordinarily, we would not be able to reinplace the fill,
        because "b" aliases with "a" which is used by the diagonal_scatter call.

        "re-inplacing" is on the hook for figuring out that it is ok to
        completely, the expensive diagonal_scatter call, if we re-inplace the add().

        So, for every `alias in alias_set(a)`, instead of checking
        that "alias" is not used anywhere later in the graph,
        we check that
            EITHER:
          (a) alias is not used anywhere later in the graph
            OR:
          (b) alias is used exactly once later on in the graph,
              in the following op:

                out = foo_scatter(alias, x, args...)

              where the following must hold:
                (i) "foo_scatter" is the "inverse" operator for foo.
                    This only applies to "foo" ops that are view operators,
                    which view into a subset of the original tensor's memory.
                    In practice, there are ~4 operators where this applies:
                      diagonal -> diagonal_scatter
                      slice -> slice_scatter
                      select -> select_scatter
                      as_strided -> as_strided_scatter
                (ii) "args..." are the same between the foo() and foo_scatter() calls.

    (3) Perform the actual re-inplacing on foo!

      (3b) is the common case, but special care is needed for {view}_scatter (3a)

      (3a) {view}_scatter ops.

        Consider this program:
          a = torch.zeros(2, 2)
          b = torch.ones(2)
          a[0] = b

        Post functionalization, that will look like:
          a = torch.zeros(2)
          b = torch.ones(1)
          a_updated = torch.select_scatter(a, b, 0, 0)

        In this case though, there is no "functional" op to re-inplace!
        Instead, we'd like to directly remove toe select_scatter call.
        We already know from (3) that this is valid,
        because "a" has no later usages in the graph.

        We perform the re-inplacing on the {view}_scatter op like so
        Before:
          a_updated = torch.select_scatter(a, b, args...)
        After:
          a_slice = a.select(a, args...)
          a_slice.copy_(b)

      (3b) Otherwise, replace the functional op with its inplace variant.
        Before:
          b = foo(a, args...)
        After:
          a.foo_(args...)

    (4) Finally, after converting either:
          Before:
            b = foo(a)
          After:
            foo_(a)
        or
          Before:
            b = {slice}_scatter(a, mutated_slice, args...)
          After:
            slice = {slice}(a, args...)
            slice.copy_(mutated_slice)

        We now need to find all later nodes that use "b" as an argument
        and update them to take in "a" instead.

        Note that for the majority of inplace ops, this isn't actually necessary
        (because most inplace ops return "self" as their output).
        This isn't generally true for all mutable ops though, which is why
        we need to actually replace all of the arguments.

        We also need to update our metadata of Dict[StorageWeakRef, Set[Node]],
        That maps a given tensor storage to the set of all nodes that take in that storage
        as an input.
        Specifically, re-inplacing `b = foo(a)` causes "a" and "b"'s sets to get fused
        together.

    (5) Any "view_inverse/scatter" nodes that were identified as "it's ok to ignore them"
        during step (3) get manually deleted from the graph.
        Their outputs are no longer used, so technically standard DCE would be able
        to do this, but we can no longer run FX's DCE pass now that we have mutable
        ops in the graph.
    c                 S   s(   h | ] }|j d krt|jd  qS )placeholderr+   )r9   r	   r2   r?   )rF   r*   r   r   r   	<setcomp>  s
    
zreinplace.<locals>.<setcomp>r+   c                    s$   t | tr t|     d S rO   )r   r   r	   r?   r_   rn   )rf   storage_to_nodesr   r   _add_to_map  s    
zreinplace.<locals>._add_to_mapr-   r   r   FTc                    s   g | ]}| kr|qS r   r   rF   r`   )self_argr   r   rJ     s      zreinplace.<locals>.<listcomp>r,   r   Nc                    s$   g | ]}|j d   j d  kr|qS )r,   rm   )rF   rf   r)   r   r   rJ   d  s      c                    s   | kr S | S rO   r   )rG   )newoldr   r   replace_argi  s    zreinplace.<locals>.replace_argc                 S   s"   h | ]}t |trt| qS r   r   r   r	   r?   r|   r   r   r   ry   v  s
    
c                 S   s"   h | ]}t |trt| qS r   r   r|   r   r   r   ry   z  s
    
c                 S   s"   h | ]}t |trt| qS r   r   r|   r   r   r   ry     s
    
)4r(   rL   graphZnodesr   rc   r2   r   	enumerater9   r   r4   r   r   r   r   r   r    rP   Z
TensorTyper3   r   rU   ZnumelZdtypeZ_debug_has_internal_overlapr5   r6   resizer8   rR   r	   r?   rg   rw   rs   Zinserting_beforeZcreate_nodetuplert   r7   r_   r\   update	itertoolschainrd   r   r   r:   Z
erase_nodeZ	recompile)(ZgmZsample_argsZinput_storagesr{   Z&all_later_view_inverse_nodes_to_deleteidxZself_flattenedrN   Znode_flattenedZself_has_wrong_metadataZ	self_metaZ	node_metaZself_arg_nameZself_arg_storageri   rh   Zlater_view_inverse_node_usagesZcan_reinplaceZview_opZmutated_slice_nodeZremaining_slice_argsZ
slice_nodeZ	copy_noderY   Zcurr_node_storageZnodes_to_updateZnode_to_updatenew_argsr3   r   Zold_flattened_resZnode_flattened_resZold_res_storageZnode_res_storageZnew_flattened_resZnew_res_storageZold_refZnew_refZnode_refZ	to_deleter   )rf   r~   r*   r   r}   rz   r   r      s     J







    
  




&)1r   Ztorch.fxr   Ztorch.fx._compatibilityr   Ztorch._subclasses.fake_tensorr   r   Ztorch.utils._pytreer   r   r   Z torch.multiprocessing.reductionsr	   r<   enumr
   r   typingr   r   collectionsr   __all__r   r$   r'   ZfxZInterpreterr(   rV   r\   r5   r6   Zdiagonal_scatterr8   ZdiagonalZselect_scatterselectintZslice_scattersliceZTensorZas_strided_scatterZ
as_stridedrs   rg   rw   r   r   r   r   r   <module>   s>   @#
 

 

 

 

,