U
    dA                     @   s  d dl Zd dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
Z
d dlZd dlmZmZ d dlm  mZ edZeej edddd	gZd[ddZd\ddZdd Zd]ddZdd Zdd Zdd Zdd Z dd Z!dd Z"d d! Z#d"d# Z$d$d% Z%d&d' Z&d^d(d)Z'd*d+ Z(d,d- Z)d.d/ Z*d0d1 Z+d_d2d3Z,d4d5 Z-d`d6d7Z.dad8d9Z/d:d; Z0d<d= Z1d>d? Z2ed@dAdBdCgZ3dDdE Z4dFdG Z5G dHdI dIe
j6Z7dJdK Z8e&de7j9fdLdMZ:dNdO Z;dPdQ Z<edRdSdTgZ=dUdV Z>dWdX Z?dYdZ Z@dS )b    N)	workspacecore)
caffe2_pb2)	viewitems
viewvaluesZmemonger	LiveRangedefinedusedsizeFc              
      s  fdd  fdd}t d dkr>ds>d7 t|  }g }	t|  j}
|  jD ]0}|j	D ]$}|d |j
krr||
krr|	| qrqht|	d	d
 }	g }t|jD ]\}}||r|| qt }|  jD ]>}t|j
t|j	 D ]$} |s|r||	kr|| qqt }t| dd |D |tdd |D d|d	krlt n||d	kr|i n|}t dt |  t }|| t|  |stdt|  |std|S )a$  
    Implements similar optimization as Torch's shareGradInput():
    for the gradients that are passed between layers, share blobs between
    operators when possible. This yields significant memory savings with
    deep networks.

    Returns an optimized protobuf (assign to net._net)
    c                    s2   t | }|do0| s*|d  o0|kS )NZ_grad_)strendswith
startswith)bname)	namescopeparam_grads :/tmp/pip-unpacked-wheel-ua33x9lu/caffe2/python/memonger.pyis_grad_blob)   s    z&share_grad_blobs.<locals>.is_grad_blobc                    s,   t | jt | j D ]} |r dS qdS )NTF)listinputoutput)opr   )r   r   r   
is_grad_op0   s    z$share_grad_blobs.<locals>.is_grad_opz4NOTE: Executing memonger to optimize gradient memory /_wNc                 S   s   g | ]}t |d qS utf-8r   encode.0sr   r   r   
<listcomp>X   s     z$share_grad_blobs.<locals>.<listcomp>c                 s   s   | ]}t |d V  qdS r    Nr!   r#   r   r   r   	<genexpr>Z   s     z#share_grad_blobs.<locals>.<genexpr>r    )Memonger memory optimization took {} secs(Memonger graph is not equal to original.+Inplace assignments differ in memonger net.)logwarnr   copydeepcopyProtosetexternal_outputr   r   r   append	enumerater   addtimeC'memonger_compute_blob_recycling_for_dagSerializeToStringr"   infoformatr   NetDefParseFromStringverify_graph_equalityAssertionErrorverify_inplace_blobs)netZlossesr   r   Zdont_share_blobsZshare_activationsZblob_shapesr   netprotoZactivationsr2   r   r   Zgrad_op_indicesidxZshared_blobs
start_time	optim_stroptimr   )r   r   r   r   share_grad_blobs   sX    




rG   r   c              	      s  t |  }t|  j t|  j fdd}t }t }t|  j}dd t|  jD }|D ]t}	|	j	D ],}
||
rx|
|
 |
|krxtd|
qx|	jD ]}
||
r|
|
 q|t|	j}|	jrntdqnt }t| dd |D |tdd	 |D |d
t i }tdt |  t }|| t|  |shtdt|  |std|S )Nc                    s   |  ko| kS Nr   )r   external_inputr2   r   r   is_activation_blobr   s    z6optimize_inference_for_dag.<locals>.is_activation_blobc                 S   s   g | ]\}}|qS r   r   )r$   indexr   r   r   r   r&   x   s     z.optimize_inference_for_dag.<locals>.<listcomp>z{} not in external inputzCYou can only pass inference-only nets to optimize_inference_for_dagc                 S   s   g | ]}t |d qS r   r!   r#   r   r   r   r&      s     c                 s   s   | ]}t |d V  qdS r'   r!   r#   r   r   r   r(      s     z-optimize_inference_for_dag.<locals>.<genexpr>r    r)   r*   r+   )r.   r/   r0   r1   rJ   r2   r   r   r4   r   r5   r?   r;   r   unionZis_gradient_opr6   r7   r8   r9   r"   r,   r:   r   r<   r=   r>   r@   )rA   input_blobsr   rB   rK   Zactivation_blobsZseen_as_outputopsZ
op_indicesr   r   rD   rE   rF   r   rI   r   optimize_inference_for_dagm   sV    





rP   c                    sb  dd l tjjdtjjdtjjdtjjdtjjdtjjdtjj	dtjj
dtjjdtjjdi
 fddfdd	}fd
d| D } tdd }d}d}d}t }	| D ]}
|
jD ]}|jdks|jdkr|jD ]"}||	kr|||8 }|	| qq|jD ]N}||	kr||}||7 }||7 }t||}|	| ||j  |7  < qqq|||fS )Nr               c                    s0    fdd| j D }| j d d = | j | | S )Nc                    s$   g | ]}|j  ks|jd kr|qS )>   FreeAlias)device_optiontype)r$   r   devicescoper   r   r&      s    
 
z<estimate_memory_usage.<locals>.split_net.<locals>.<listcomp>)r   extend)protorO   rY   r   r   	split_net   s    z(estimate_memory_usage.<locals>.split_netc                    sB   | ks| kr$t d|  dS |   }| |   S )NzUnknown blob encountered: {}r   )r,   warningr;   prod)blobsizeof)npshapessizeofstypesr   r   	num_bytes   s
    z(estimate_memory_usage.<locals>.num_bytesc                    s   g | ]} |qS r   r   )r$   r\   )r]   r   r   r&      s     z)estimate_memory_usage.<locals>.<listcomp>c                   S   s   dS Nr   r   r   r   r   r   <lambda>       z'estimate_memory_usage.<locals>.<lambda>rU   rV   )Znumpyr   ZTensorProtoZDOUBLEFLOATZFLOAT16ZINT32ZINT8ZUINT8ZUINT16ZINT16ZBOOLZINT64collectionsdefaultdictr1   r   rX   r   removemaxr5   )protosrc   re   rZ   rf   Zallocs_by_opsZcurrent_allocatedZmax_allocatedZtotal_allocatedZ	allocatedr\   r   or   nbytesr   )rZ   rb   rc   rd   r]   re   r   estimate_memory_usage   s\              





rr   c              
   C   s6  t  }t  }t  }t| } | jD ]d}|jdkrB||jd  q"|jD ]}|| qH|jD ]&}||kr^|dksz||r^|| q^q"|t | j }|	|}|| }|| }t
| j}	ttdt| jD ]J}
| j|
 }|jD ]4}||kr|| |	|
d td|g|g qq| jdd= | j|	 | S )a  
    Insert Free-ops after a blob has been used the last time, so that its
    memory can be reclaimed. Use this only with efficient caching memory
    managers (such as CUB, --caffe2_cuda_memory_pool=cub).

    Blobs used with Alias op won't be freed.

    @dont_free_blobs:  is a set of blobs that should not be freed
    @selector_fun:     optional lambda that return True if blob name
                       can be released. Use for easy special filtering, like
                       excluding blobs with "loss" in the name.

    Returns a new protobuffer. To use with a model, use:
        model.net._net = memonger.release_blobs_when_used(..)
    rV   r   NrT   rU   )r1   r.   r/   r   rX   r5   r   r   r2   intersectionr   reversedrangelenrm   insertr   ZCreateOperatorr[   )rB   Zdont_free_blobsZselector_funrN   Zcan_releaseZalias_blobsr   inpZoutprO   jr   r   r   release_blobs_when_used   s8    









"rz   c                 C   s.   g }| D ] }t | |}|s|| q|S )z# Return nodes without predecessors )r   Zpredecessorsr3   )gretcnZcur_predr   r   r   _find_source_nodes  s    r~   c                 C   s.   g }| D ] }t | |}|s|| q|S )z! Return nodes without successors )r   
successorsr3   )r{   r|   r}   Zcur_succr   r   r   _find_target_nodes$  s    r   c                 C   sf   t | }t|dkstt|dkr(| S t| }dd }|| }|| |D ]}||| qP|S )NrT   c                 S   s&   d}| D ]}||kr|}q|d7 }|S )NrT   r   )r{   r|   r}   r   r   r   _next_available_idx5  s    z8_add_single_target_ifneeded.<locals>._next_available_idx)r   rv   r?   r.   r/   add_nodeadd_edge)r{   targetsr|   r   Ztarget_node_idxr}   r   r   r   _add_single_target_ifneeded.  s    

r   c                    s   t  fdd D stt  fddd}g }|}|dk	r|| z| | r^| | d nd}W q6 tk
r   | | }Y q6X q6tt|S )z. Get the path from nx.bellman_ford()'s output c                 3   s   | ]} | d kV  qdS )r   Nr   r$   x	dist_listr   r   r(   I  s     z_get_path.<locals>.<genexpr>c                    s    |  S rH   r   r   r   r   r   rh   K  ri   z_get_path.<locals>.<lambda>keyNr   )allr?   minr3   	TypeErrorr   rt   )Z	pred_listr   targetr|   curr   r   r   	_get_pathE  s    
r   c           
      C   s   t | }| D ]\}}d|| | d< qi }|D ]X}tj||dd\}}t||}	|	d |ksftt|	d ||	d   kst|	||< q4|S )zn Get the longest path for nodes in 'source_nodes'
        Find with bellman_ford() by setting weight = -1
    r   weight)r   r   rT   )r.   r/   edgesnxZ%bellman_ford_predecessor_and_distancer   r?   rv   )
r{   source_nodesZnguvr|   r}   preddistpathr   r   r   _get_longest_paths\  s    


r   c                    s   t  fdd D stt }dd  D }||  D ]8}t|dd |dd D ]}||d |d  q\q> d d }t|| ||fS )	z Build a tree for given paths based on common elements.
        Last elements of all paths are the same, which is the root of the tree.
    c                 3   s"   | ]}|d   d d  kV  qdS )r   r   Nr   )r$   cppathsr   r   r(   t  s     z_build_tree.<locals>.<genexpr>c                 S   s   h | ]}|D ]}|qqS r   r   r$   r   yr   r   r   	<setcomp>v  s       z_build_tree.<locals>.<setcomp>r   r   rT   N)r   r?   r   DiGraphZadd_nodes_fromzipr   _compute_tree_height)r   r{   Znode_setr   Zcerootr   r   r   _build_treep  s    

r   c                    s    fdd  | dS )zR Compute the heights of the tree for all nodes
        Height of leaves are 0
    c                    sF   t | }d}|r4 fdd|D }t|d }|j|  d< |S )Nr   c                    s   g | ]} |qS r   r   r   _get_heightr   r   r&     s     z=_compute_tree_height.<locals>._get_height.<locals>.<listcomp>rT   height)r   r   rn   nodes)r   childrenr   child_heightsr   r{   r   r   r     s    z)_compute_tree_height.<locals>._get_heightNr   r{   r   r   r   r   r     s    	r   c                    s$   fdd  fdd|S )z For each node, sort its child nodes based on the height of the nodes.
        Return the leaf nodes of the tree after sorting.
    c                    s    j |  d S )Nr   )r   )r   r{   r   r   r     s    z&_sort_tree_leaves.<locals>._get_heightc                    sl   t | }|s| gS fdd|D  ttt| fddd}g }|D ]}|| }||7 }qN|S )Nc                    s   g | ]} |qS r   r   r   r   r   r   r&     s     zA_sort_tree_leaves.<locals>._get_sorted_leaves.<locals>.<listcomp>c                    s    |  S rH   r   r   r   r   r   rh     ri   z?_sort_tree_leaves.<locals>._get_sorted_leaves.<locals>.<lambda>r   )r   r   sortedru   rv   )r   r   orderr|   coZcrr   _get_sorted_leavesr{   r   r   r     s    z-_sort_tree_leaves.<locals>._get_sorted_leavesr   r   r   r   r   _sort_tree_leaves  s    r   c                    s  t | }t|}t||}ttt|\}}t||}t|t|ksLtt	j
dk rdt	| |}nt| t|}|D ]6}	t	| |	}
|
D ] }||kr||  | qqxt fddt D t	jjj| fddd}t|}t|t| jkst|S )a5   The graph 'g' may contain several source nodes (nodes without incoming
        edge), which could be in any order and still be a valid
        topological sorting result. We would like to arrange these source nodes
        so that the average live spans of the computed blobs are shorter.
        The idea is to sort the source nodes based on the length of their path to
        the target node so that the one with longer path is used first.
        This is done by:
        - Add a single target node if there are multiple target nodes in 'g'.
        - Find the longest path between each source and the target node.
        - Convert the longest paths to a tree with the target node being the root
          and source nodes being the leaves.
        - Sort the nodes of the tree based on the height of the tree.
    z2.0c                 3   s"   | ]\}}|t  | fV  qd S rH   rv   )r$   ir   )dependency_orderr   r   r(     s     z:topological_sort_traversal_longest_path.<locals>.<genexpr>c                    s    |  S rH   r   r   )sort_keyr   r   rh     ri   z9topological_sort_traversal_longest_path.<locals>.<lambda>r   )r   r~   r   r   r   r   r   r   r?   r   __version__topological_sortr1   Zdescendantsr5   r3   dictr4   Z
algorithmsZdagZ lexicographical_topological_sortrv   r   )r{   gtr   Zlpathstreer   Zsorted_sourcesr|   Z
seen_nodesr%   descdr   )r   r   r   'topological_sort_traversal_longest_path  s0    



 
r   c                 C   s   t t| S rH   )r   r   r   r   r   r   r   topological_sort_traversal  s    r   c           	      C   s"  |st d tdd }t| D ]\}}|jD ]n}|| j}|d krN|}n
t||}|| j|d||< |rx|| nd }|r|d k	st	|| j|d||< q2|j
D ]r}|| j}|d kr|}n
t||}|| j|d||< |r|| nd }|r|d k	st	|| j|d||< qq$|S )Nz4Provide blob sizes to get more accurate assignments.c                   S   s   t d d d dS )N)r   r	   r
   )r   r   r   r   r   rh     ri   z compute_ranges.<locals>.<lambda>r	   r
   r   )r,   r^   rk   rl   r4   r   r	   rn   _replacer?   r   r   r   )	linearized_ops
blob_sizesblobsr   r   r`   r	   Z	blob_sizer   r   r   r   compute_ranges  s2    






r   c                 C   sF   |d \}}||krdS | j d ks6|j d ks6|jd kr:dS | j |jkS )Nr   F)r   r	   )candidate_range
assignmentstatic_blobsr   range_r   r   r   is_compatible  s    r   c                 C   sB   i }| D ]4}t |dkrq|d \}}|D ]\}}|||< q*q|S )NrT   r   r   )assignmentsblob_assignmentsr   Z	last_blobr   r`   r   r   r   compute_blob_assignments  s    r   c                 C   s.   | sdS t dd | D }|d kr&dn|}|S )Nr   c                 S   s   g | ]}|d  j qS rT   r   r   r   r   r   r&     s     z!_get_max_size.<locals>.<listcomp>rn   )r   r|   r   r   r   _get_max_size
  s
    r   c                 C   s   d}| D ]}|t |7 }q|S rg   )r   )r   r|   r   r   r   r   get_memory_usage  s    r   c                 C   s   |pg }dd |D }| D ]\}}||kr,qd}d}t d}|jpDd}	t|D ]8\}
}t||g rNd}tt||	 }||k rN|}|
}qN|r|| }|||f q|||fg q|S )Nc                 S   s   h | ]}|D ]}|d  qqS r   r   r   r   r   r   r     s       z-compute_assignments_greedy.<locals>.<setcomp>Fr   infT)floatr
   r4   r   absr   r3   )ranges_sortedZinit_assignmentsr   visitedr   r   assignedbest_assignmentZmin_distZcandidate_sizerC   r   r   r   r   r   compute_assignments_greedy  s*    
r   c                 C   s   | rt dd | D S dS )z' Return number of blobs in assignments c                 S   s   g | ]}t |qS r   r   r   r   r   r   r&   6  s     z_get_count.<locals>.<listcomp>r   )sum)r   r   r   r   
_get_count3  s    r   c                 C   sB  dd }dd }|sdg}|d  d7  < |rz|d d dkrz| d d j | d d jg}td	|d |d |d  |pg }g }t| D ]\}}||||}	|	dk rt|nt||	 }
| |	d |d  }|||
|r|d n||}t|t|
t	| kst
|t| qt	|t	| ks6t
|d }|S )
aw   Compute assignment for blobs in 'ranges_sorted' on top of 'init_assignment'
        using dynamic programming + recursion.

        ranges_sorted: blobs sorted by 'used'
        init_assignment: assignment to start with, blobs in 'ranges_sorted' should
                         not be used in 'init_assignment'

        Using f(b, k, init) to represent the best assignment for blobs b[0:k]
        given initial assignment 'init', we have
            f(b, k, init) = f(b, j, init) +
                            find_best(b[j:k], f(b, j, init))
        where j is the index of the last best assignment that is independent of
        blob b[k - 1] (b[k - 1] is compatible with all assignments in
        f(b, j, init)), and find_best(b1, init1) gives the best assignment
        for blobs in 'b1' based on the initial assignment 'init1', and blobs
        b1[0:-1] should be incompatible with b1[-1]. f(b, len(b), []) gives
        the best assignment for blobs 'b'.

        For find_best(b, init), since b[0:-1] are not compatible with b[-1], we
        could reduce it to a smaller problem to find best assignment for b[0:-1]
        as
            find_best(b, init) = min {
                f(b[0:-1], len(b) - 1, init - x) + [x, b[-1]] for x in init, or
                f(b[0:-1], len(b) - 1, init) + [b[-1]]
            }
        where min{} gives the assignment with minimum memory usage.
    c                 S   s<   dd }|d }|dkr8|| }|| |r.|S |d8 }qdS )z Find closest position k of best_assignments that is independent of
            candidate_range that candiate_range is compatible with all assignments
            in best_assignments[k].
            Return -1 if not found.
        c                    s   t  fdd|D S )z> return true if compatible for all assignments in assignments c                    s   g | ]}t  d  |g qS r   r   r   r   r   r   r&   _  s     zccompute_assignments_dp.<locals>._get_compatible_prev.<locals>.is_compatible_all.<locals>.<listcomp>)r   )r   r   r   r   r   is_compatible_all]  s    zOcompute_assignments_dp.<locals>._get_compatible_prev.<locals>.is_compatible_allrT   r   r   r   )r   best_assignmentsZcur_idxr   iiZcbar   r   r   _get_compatible_prevW  s    

z4compute_assignments_dp.<locals>._get_compatible_prevc           	         s   | d  t  fdd| dd D s*tt|}g }t|D ]t d | g sXq>t|}|   t| dkrfddt|D }t	| dd ||}|| g }|| q>|| gg  t
|d	d
 d}|S )a*   Find the best assignment for blobs 'ranges' given an initialized
            assignment 'init_assignment'.

            Blobs in ranges[0:-1] should be incompatible with blob range[-1].
            'prev_best_assignment': best assignment for blobs in ranges[:-1]

            By assigning ranges[-1] to each assignment k in 'init_assignment' or
            in a new assignment, the problem becomes a smaller problem to find
            the best assignment for ranges[0:-1] given the initial assignment
            init_assigment[0:k, (k+1):-1].
        r   c                 3   s"   | ]}t |d   gg  V  qdS rT   Nr   r   )
find_ranger   r   r(   y  s     z=compute_assignments_dp.<locals>._find_best.<locals>.<genexpr>r   rT   c                    s   g | ]\}}| kr|qS r   r   )r$   r   r   )r   r   r   r&     s      z>compute_assignments_dp.<locals>._find_best.<locals>.<listcomp>Nc                 S   s   t | S rH   )r   r   r   r   r   rh     ri   z<compute_assignments_dp.<locals>._find_best.<locals>.<lambda>r   )r   r?   rv   ru   r   r.   r/   r3   r4   compute_assignments_dpr   )	rangesinit_assignmentZprev_best_assignmentcounterszZbest_candidatescur_bestZcur_best_tmpr|   r   )r   r   r   
_find_besti  s*    "

  z*compute_assignments_dp.<locals>._find_bestr   rT   i  r   z$Finding assignments {} ({} -> {})...)r   r	   r,   r:   r;   r4   r.   r/   r   rv   r?   r3   )r   r   r   r   r   rsr   r   Z	cur_rangeZprev_idxZ	prev_bestZranges_partr   bestr   r   r   r   :  s>    '   r   c                    s8   dd }dd  dkr ||  fdd| D } | S )z Set LiveRange.defined = -1 if it is None
        Set LiveRange.used = max_live if it is None
        Set LiveRanee.size = 1 if it is None
    c                 S   s   t dd | D d }|S )Nc                 s   s"   | ]}|d  j r|d  j V  qdS r   r   r   r   r   r   r(     s     
 z<get_updated_ranges.<locals>._get_max_live.<locals>.<genexpr>rT   r   )r   max_liver   r   r   _get_max_live  s    z)get_updated_ranges.<locals>._get_max_livec                 S   sz   | }| d j d kr*|d |d jddf}| d jd krP|d |d j|df}| d jd krv|d |d j|df}|S )NrT   r   r   r   r   r   )r   r   r	   r
   )r   r   r
   Zcxr   r   r   _update_range  s    z)get_updated_ranges.<locals>._update_rangeNc                    s   g | ]} |d qS r   r   r   r   r   r   r   r&     s     z&get_updated_ranges.<locals>.<listcomp>r   )r   r   r   r   r   r   get_updated_ranges  s    
r   c                    s   t t| dd d} t| }  fdd| D } fdd| D }tdt| g }|tjkrnt	|g }n$|tj
krt|g }nd|st|d	d |D 7 }|S )
a]  
    algo: Method used to find assignments (AssignmentAlgorithm.GREEDY or
          AssignmentAlgorithm.DYNAMIC_PROGRAMMING).
          AssignmentAlgorithm.DYNAMIC_PROGRAMMING gives optimal solution at the
          cost of more computation.
          AssignmentAlgorithm.GREEDY may be better in the case 'blob_sizes' is
          not provided.
    c                 S   s   | d j d k| d j fS )NrT   r   )pr   r   r   rh     ri   z%compute_assignments.<locals>.<lambda>r   c                    s   g | ]}|d   kr|qS r   r   r   r   r   r   r&     s      z'compute_assignments.<locals>.<listcomp>c                    s   g | ]}|d   kr|qS r   r   r   r   r   r   r&     s      zTotal sharable blobs {}zInvalid algo name {}c                 S   s   g | ]
}|gqS r   r   r   r   r   r   r&     s     )r   r   r   r,   r:   r;   rv   AssignmentAlgorithmDYNAMIC_PROGRAMMINGr   GREEDYr   r?   )r   r   algoZranges_sharableZranges_staticr   r   r   r   compute_assignments  s     

r  c                 C   sJ   | D ]@}t |dd |dd  D ] \}}|d j|d jk s"tq"qd S )Nr   r   rT   )r   r	   r   r?   )r   r   r   r   r   r   r   verify_assignments  s    "r  c                    s   t  }t| D ]\}}|j||d qt| D ]p\}}t| D ]^\} ||krRq@t fdd|jD r@t j|j}|j	|||d t 
|s@t q@q0|S )Nr   c                 3   s   | ]}| j kV  qd S rH   )r   )r$   r   Zchild_opr   r   r(     s     z-compute_interference_graph.<locals>.<genexpr>)deps)r   r   r4   r   anyr   r1   r   rs   r   Zis_directed_acyclic_graphr?   )rO   r{   r   r   Z	parent_opry   r  r   r  r   compute_interference_graph  s    r  OptimizationrA   r   r   c                    sx    fdd}| j D ]`}|jdr.t| | t|jD ]\}}|||j|< q8t|jD ]\}}|||j|< qZqd S )Nc                    s   |  kr| S  |  S rH   r   )r`   r   r   r   canonical_name  s    z)apply_assignments.<locals>.canonical_nameZRecurrentNetwork)r   rX   r    apply_recurrent_blob_assignmentsr4   r   r   )rA   r   r
  r   r   Zinput_r   r   r	  r   apply_assignments  s    
r  c                 C   sL  t d| j dd | jD }|D ].}t|jD ]\}}||  |j|< q4q&dd | jD }|D ].}t|jD ]\}}||  |j|< qxqjdd | jD }	|	D ]>}
t	|
j
| t|
j
jD ] \}}||kr|||
j
j|< qqt|D ]P\}}|t| jt| j krt }|d |_t|d|_| j|g qd S )Nz(Applying assignments to recurrent op: {}c                 S   s   g | ]}|j d r|qS )	alias_dstr   r   r$   ar   r   r   r&   %  s      z4apply_recurrent_blob_assignments.<locals>.<listcomp>c                 S   s   g | ]}|j d r|qS )link_externalr  r  r   r   r   r&   +  s      c                 S   s   g | ]}|j d r|qS )Zstep_netr  r  r   r   r   r&   1  s      z.renameascii)r,   debugr;   rX   argr4   stringsdecoder"   r  nrJ   r   r   r   r   r   ZArgumentr   r   r%   r[   )r   r   r
  Zalias_dst_argsr  r   r`   Zlink_external_argsr  Z	step_argsZstep_argZeinpZrenamedr  r   r   r   r  !  s*    
r  c                   @   s   e Zd ZdZdZdS )r   r   rT   N)__name__
__module____qualname__r   r   r   r   r   r   r   A  s   r   c                 C   s0   t  }t|  dd |D }|| |S )Nc                 S   s   g | ]}t |d qS r   r!   r#   r   r   r   r&   J  s     z+optimize_inference_fast.<locals>.<listcomp>)r   r<   r7   Zmemonger_optimize_inference_netr9   r=   )rA   r   rF   rE   r   r   r   optimize_inference_fastF  s    
r  c                    s|   t   t j}||} fdd|D } jdd=  j| t||}t|||}	t|	}
t |
 t	 |
|	dS )a_  
    ordering_function: topological_sort_traversal or
                       topological_sort_traversal_longest_path.
                       topological_sort_traversal_longest_path gives better
                       results but needs a bit more computation.
    algo: Method used to find assignments (AssignmentAlgorithm.GREEDY or
          AssignmentAlgorithm.DYNAMIC_PROGRAMMING).
          AssignmentAlgorithm.DYNAMIC_PROGRAMMING gives optimal solution at the
          cost of more computation.
          AssignmentAlgorithm.GREEDY may be better in the case 'blob_sizes' is
          not provided.
    c                    s   g | ]} j | qS r   r  )r$   r   rA   r   r   r&   m  s     z)optimize_interference.<locals>.<listcomp>N)rA   r   r   )
r.   r/   r  r   r[   r   r  r   r  r  )rA   r   Zordering_functionr   r   r{   Zorderingr   r   r   r   r   r  r   optimize_interferenceP  s    



r  c                 C   sL   dd }t | j|jD ]0\}}|j|jkr0 dS ||||kr dS qdS )z
    Verifies that net_a and net_b have the same in-place blob assignments.
    Particularly, that memonger did not add an in-place assignment when that
    did not exist before.
    c                 S   sB   t | j}g }t| jD ]$\}}||kr||||g q|S rH   )r   r   r4   r   r3   rL   )r   outZinplacesry   rx   r   r   r   get_inplaces  s    
z*verify_inplace_blobs.<locals>.get_inplacesFT)r   r   rX   )net_anet_br  op_aop_br   r   r   r@     s    r@   c           
      C   s   dd }t | jt |jkr dS t| j|jD ]2\}}|j|jksZ|j|jksZ|j|jkr. dS q.|| j}||j}||krd}t||D ]F\}}	||	krtd|| j| |j|  td||	 |d7 }q||kS )a  
    Determines if the execution of two graphs are identical.
    That is, all inputs blobs are mapped to the same output blobs
    for each operator in their respective positions.

    This is meant to check the output of memonger with the original graph.
    It assumes that the nets have same external input and output.

    O(E) runtime + O(1) amortized cost to hash for python dict
    c                 S   sh   dd | D }i }t | D ]H\}}|jD ]$}||}|d k	r(|| | q(|jD ]}|||< qTq|S )Nc                 S   s   g | ]}g qS r   r   )r$   r   r   r   r   r&     s     z>verify_graph_equality.<locals>.parent_list.<locals>.<listcomp>)r4   r   getr3   r   )rO   parent_listZ
edge_ownerr   r   r`   Z	parent_idr   r   r   r%    s    


z*verify_graph_equality.<locals>.parent_listFr   zDifference {} vs {} 
 {}zParents: {} vs {}rT   )rv   r   r   rX   rW   Zengineprintr;   )
r   r!  r%  r"  r#  Zparent_list_aZparent_list_bry   r  r   r   r   r   r>     s0    



  
r>   
Statisticsbaseline_nbytesoptimized_nbytesc                 C   s>   d}zt | j}W n$ tk
r8   td|  Y nX |S )Nr   zError when fetching blob {})r   Z	FetchBlobrq   	Exceptionr,   r^   r;   )r`   r   r   r   r   blob_nbytes  s    r+  c                    s<   dd | D  t t }t  fdd| D }t||dS )Nc                 S   s$   i | ]}|D ]\}}|t |qqS r   )r+  )r$   r   r`   r   r   r   r   
<dictcomp>  s
     z&compute_statistics.<locals>.<dictcomp>c                 3   s$   | ]}t  fd d|D V  qdS )c                 3   s   | ]\}} | V  qd S rH   r   )r$   r`   r   Z
blob_bytesr   r   r(     s     z/compute_statistics.<locals>.<genexpr>.<genexpr>Nr   )r$   r   r-  r   r   r(     s   z%compute_statistics.<locals>.<genexpr>)r(  r)  )r   r   r'  )r   r(  r)  r   r-  r   compute_statistics  s    r.  c                 C   sD   i }| j D ]4}|jD ]}t|||< q|jD ]}t|||< q,q
|S rH   )r   r   r+  r   )rA   r   r   r`   r   r   r   collect_blob_sizes  s    


r/  )NFN)r   )N)N)N)N)N)AZnetworkxr   rk   r6   r.   Zcaffe2.pythonr   r   Zcaffe2.protor   enumloggingZfuture.utilsr   r   Z!caffe2.python._import_c_extensionpythonZ_import_c_extensionr7   	getLoggerr,   setLevelINFO
namedtupler   rG   rP   rr   rz   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  Enumr   r  r   r  r@   r>   r'  r+  r.  r/  r   r   r   r   <module>   s|   
   
U
4D
5

)



x
*  
03 	