U
    ,-e                     @   s2  d dl mZmZmZmZmZmZmZmZm	Z	 d dl
mZ d dlZd dlZd dlZd dlmZ d dlmZmZmZ ddlmZmZmZmZmZ ddlmZmZ dd	lmZ d
dl m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z'm(Z(m)Z)m*Z* d dl+m,Z,m-Z- d dl.m/Z/ d
dl0m1Z1m2Z2 d
dl3m4Z4m5Z5 d dl6m7Z7 d
dlm8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@ d dlAmBZBmCZC d dlDmEZE d dlFmGZG d
dlHmIZImJZJ d
dlKmKZK d
dlLmMZM d dlNZNddddgZOejPejQjRjSjTejUejQjRjVjTiZWejXjYeeeeZejXjYf eeZeeZe[f f eeZef ddddZ\ejXjYeeeeZejXjYf eeZeeZe[f f eeZef ddd d!Z]eedd"d#d$Z^ejXjYe_d%d&d'Z`eeeZef e_d(d)d*Zaee,dd+d,d-Zbeeedd.d/d0ZceeeZeeZe[f f eeZef eeZeZf d1d2d3Zdeedd"d4d5ZeeeeZejXjYf eejXjY d6d7d8ZfeeeZejXjYf ejgje_ee, dd9d:dZhdFeeeZejXjYf eeZ eeZef e,e_e_dd<d=dZieeedd>d?d@ZjeeeeZejXjYf eeeeef f ee ddAdBdZkdGee_eeIeeZef df e_e_eeeeZef df ee,eeZef df e_edD	dEdZldS )H    )	AnyDictListOptionalSetTupleUnionTypeCallable)	QuantTypeN)GraphModule)GraphNodeArgument   )"activation_is_statically_quantizedweight_is_quantizedget_qparam_dict_parent_nameget_swapped_custom_module_class)
QConfigAnyqconfig_equals)QConfigMapping   )_generate_node_name_to_qconfig)_compare_prepare_convert_qconfig_mappings_update_qconfig_for_fusion&_is_qconfig_supported_by_dtype_configs_update_qconfig_for_qat)-get_root_module_to_quantized_reference_moduleget_pattern_to_dtype_configsget_fused_module_classesget_qat_module_classes)BackendConfigget_native_backend_config)_is_activation_post_process)_is_observed_module_is_observed_standalone_module)update_obs_for_equalizationconvert_eq_obs)type_before_parametrizations)	_get_module_is_custom_module_lstm_is_custom_module_mhaassert_and_get_unique_deviceget_custom_module_class_keyscreate_getattr_from_valuecollect_producer_nodes graph_module_from_producer_nodesnode_arg_is_weight)is_per_channelto_underlying_dtype)_remove_qconfig)DeQuantStub)ConvertCustomConfigPrepareCustomConfig)lower_to_fbgemm)quantized_decomposed_libconvertconvert_custom_moduleconvert_standalone_moduleconvert_weighted_module)modelgraphnodemodulesnode_name_to_scopenode_name_to_qconfigreturnc           &   	      s\  |dk	st t|jtst t|| \}}||j }t fddt|jt|j	  D }	|	slt
|s||  ||jd  || W 5 Q R X dS |j}
d}t|dr|j}|
tjtjtjfkr\|s\d}d}| \}}t|jr@t|j}tjjjj}tjjjj}|j}|j }t!|
}||||||d}nLtjjj"j}tjjj#j}t$|}t|}|j}|j }t!|
}|||||d	}|| |jd }|g}|% D ]P\}}|d
krt|t$tfst&| ||| | |}|'| n
|'| q|(||t)|i }|g|dd  }|*|t)|i }|| || W 5 Q R X n|rDd}tjjj"j+}t!|
}|tj,tj-fkst d|j}|j }t.|dtj/}t.|dt0tj1j2}||||d}t3| } || |jd }|jd g}!|% D ]\}}"|!'|" q|(d| t)|!i }#|(dt4j5|#dfi }$|(dt4j5|#dfi }%|d }|d }|d }
|$|%|||
d	}W 5 Q R X || |jd }|g}|% D ].\}}|d
kr|}|'| n
|'| q|(||t)|i }|g|dd  }tjjj#j+}|*|t)|i }|| || W 5 Q R X n|
tj6krXt7ddS )av   Replace activation_post_process module call node with quantize and
    dequantize node working with decomposed Tensor

    Before:
    ... -> observer_0(x) -> ...
    After:
    ... -> torch.ops.quantized_decomposed.quantize_per_tensor(x, ...) ->
    torch.ops.quantized_decomposed.dequantize_per_tensor() -> ...

    or quantize_per_channel and dequantize_per_channel
    Nc                 3   s   | ]}t | V  qd S N_has_none_qconfig.0nrE    a/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/torch/ao/quantization/fx/convert.py	<genexpr>r   s     zM_replace_observer_with_quantize_dequantize_node_decomposed.<locals>.<genexpr>r   F
is_dynamiccall_function)_scale__zero_point__axis__quant_min__quant_max__dtype_)rS   rT   rV   rW   rX   rS   rT   r   zVonly uint8 and int8 are supported in reference flow for dynamic quantization right nowqschemeeps)rV   rW   Z_eps_rX   rV   rW   rX   z,decomposed to float16 op not implemented yet)8AssertionError
isinstancetargetstr_get_module_path_and_prefixalllistargsuserskeys_is_conversion_supportedinserting_beforereplace_all_uses_with
erase_nodedtypehasattrrQ   torchquint8qint8qint32calculate_qparamsr4   rZ   intch_axisopsquantized_decomposedquantize_per_channeldefaultZdequantize_per_channel	quant_min	quant_maxr5   quantize_per_tensorZdequantize_per_tensorfloatitemsr0   appendcreate_nodetuplerR   tensorZuint8Zint8getattrper_tensor_affineZfinfoZfloat32r[   _QSCHEME_TO_CHOOSE_QPARAMS_OPoperatorgetitemfloat16NotImplementedError)&r@   rA   rB   rC   rD   rE   module_pathprefixactivation_post_processskip_replacementrj   rQ   	node_typequantize_opscale
zero_pointrr   Zdequantize_oprw   rx   Zdtype_qparams
input_nodequantize_op_inputskeyvalue_or_nodeqparam_nodequantized_nodeZ	dq_inputsdequantized_noderZ   r[   Zchoose_qparams_opZchoose_qparams_op_inputsvalueZchoose_qparams_nodeZ
scale_nodeZzero_point_noderN   rM   rO   :_replace_observer_with_quantize_dequantize_node_decomposedZ   s   


		
	  
 

	


r   c              	      s&  |dk	st t|jtst t|| \}}||j }t fddt|jt|j	  D }	|	slt
|s||  ||jd  || W 5 Q R X dS |j}
d}t|dr|j}|
tjtjtjfkr|sd}d}| \}}t|jrt|j}||||
d}tj}n"t|}t|}|||
d	}tj}|| |jd }|g}| D ]@\}}|d
krt| ||| | |}|| n
|| q\| ||t!|i }|j"d|fd}|| || W 5 Q R X n>|rd}tj#}tj$j%j&dk}|
|d}||l |jd }|g}| D ]\}}|| q0| ||t!|i }|j"d|fd}|| || W 5 Q R X n|
tj'kr"d}d}d|
i}||l |jd }|g}| D ]\}}|| q| ||t!|i }|j"d|fd}|| || W 5 Q R X dS )z Replace activation_post_process module call node with quantize and
    dequantize node

    Before:
    ... -> observer_0(x) -> ...
    After:
    ... -> torch.quantize_per_tensor(x, ...) -> x.dequantize() -> ...
    Nc                 3   s   | ]}t | V  qd S rG   rH   rJ   rM   rN   rO   rP   O  s     zB_replace_observer_with_quantize_dequantize_node.<locals>.<genexpr>r   FrQ   rR   )rS   rT   rU   rX   )rS   rT   rX   rY   
dequantize)rc   )Zfbgemmx86)rX   Z_reduce_range_call_methodtorX   )(r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rQ   rl   rm   rn   ro   rp   r4   rZ   rq   rr   ru   rz   ry   r{   r0   r|   r}   r~   r   Zquantize_per_tensor_dynamicbackendsZ	quantizedZenginer   )r@   rA   rB   rC   rD   rE   r   r   r   r   rj   rQ   r   r   r   r   rr   r   r   r   r   r   r   r   r   Zreduce_ranger   rN   rM   rO   /_replace_observer_with_quantize_dequantize_node:  s    

	


  
 





r   )rB   rA   rF   c                 C   sD   | j d }t|ts"td| | | ||  t|| d S )Nr   z@Expecting the for call custom module node to be a Node, but got )rc   r]   r   r\   rh   ri   _insert_dequantize_node)rB   rA   Zcall_custom_module_noderN   rN   rO   6_replace_observer_or_dequant_stub_with_dequantize_node  s    


r   )r   rF   c                 C   sB   | j }d}t| dr| j}|tjtjtjfkr4| p@|p@|tjkS )NFrQ   )rj   rk   rQ   rl   rm   rn   ro   r   )r   rj   rQ   rN   rN   rO   rf     s    
rf   )rB   rE   rF   c                 C   s"   t | to | j|ko || j dkS )z] Check if a node has a qconfig of None, i.e. user requested to not quantize
    the node
    N)r]   r   name)rB   rE   rN   rN   rO   rI     s    rI   )observedbackend_configrF   c                 C   sZ   | j jD ]L}|jdkrq|jD ]4}|rt||rt|}|dkrBqt| |}|  qqdS )z Extract the subgraph that produces the weight for dynamic quant
    or weight only quant node and run the subgraph to observe the weight.
    Note that the observers of dynamic quant or weight only quant ops are
    run during the convert step.
    rR   N)rA   nodesoprc   r3   r1   r2   )r   r   rB   Znode_argZweight_observer_nodesZweight_observer_modulerN   rN   rO   _run_weight_observers  s    

 r   )argrB   rA   rF   c                 C   s   t | tr6| jdkr6| jdkr6| jd }|| | n`t | ttfr\| D ]}t||| qHn:t | t	r| 
 D ]}t||| qnntdt|   dS )z If the arg is a dequantize Node, or a list/tuple/dict of dequantize Node,
    we'll recursively remove the dequantize Node
    r   r   r   z6Unsupported node type in recursive remove dequantize: N)r]   r   r   r^   rc   replace_input_withrb   r~   "_maybe_recursive_remove_dequantizedictvalueswarningswarntype)r   rB   rA   quantize_nodeZarg_elementrN   rN   rO   r     s    


r   )obs_noderD   rE   rF   c                 C   s   | j d }t|ts"td| |j|kr:||j dknd}|rt| j}|rX|d nd}d}|D ]&}|jdkrd|jt	j
jjkrd|} qqd|r|}d}	n|}d}	|r|j|kr||j \}
}nd}
|
|	fS )aj   Given and observer node, get the `Scope` or the fully qualified name for
    the submodule containing the observed node, also return a prefix of "_input"
    when the observed node is an input of a F.linear op, and not the output of another
    quantized op.
    TODO: this logic is hacky, we should think about how to remove it or make it more
    general
    r   z.Expecting observed node to be a Node, but got NrR   _input )rc   r]   r   r\   r   rb   rd   r   r^   rl   nnZ
functionalZlinear)r   rD   rE   observed_nodeZis_input_observer_onlyrd   Zfirst_linear_use_or_first_useZlinear_noderL   r   r   _rN   rN   rO   r`     s2    

r`   c              	   C   sL   | | 8 |d| f}t| jD ]}||k	r$|| | q$W 5 Q R X dS )z3 Inserts dequantize node for `node` in `graph`
    r   N)Zinserting_afterr   r   rd   r   )rB   rA   Zdequantize_nodeZ	user_noderN   rN   rO   r   -  s
    r   )rB   rC   rF   c                 C   s<   | j  D ],}|jdkr
|t|j }t|r
|  S q
dS )z\
    If the node is observed, return the observer
    instance. Otherwise, return None.
    call_moduleN)rd   re   r   r_   r^   r%   )rB   rC   Zmaybe_obs_node	maybe_obsrN   rN   rO   _maybe_get_observer_for_node8  s    

r   )rB   rC   r@   is_referencer   rF   c                 C   s  |rt jjjj}nt jjjj}|t| j }|jd j	}t
| j}tt|D ]X}	|	|krN||	 }
|
jdkrN|
jdkrN|
jd }| |
| t|
jdkrN|j|
 qN|jd j}t|dkr|d dkstdt| |j |||d}t| j\}}t|| || ||t| j< dS )a   Converts a observed standalone module to a quantized standalone module by calling
    the fx convert api, currently using the same `is_reference` flag as parent, but we may
    changing this behavior in the future (e.g. separating quantization and lowering for
    standalone module as well)

    Args:
      - node: The call_module node of the observed standalone module
      - modules: named_module of original model
      - model: original model
      - is_reference: a flag from parent provided by user to decide if we want to
        produce a reference model or a fbgemm/qnnpack model
      - backend_config: backend configuration of the target backend of quantization
    _observed_graph_module_attrsr   r   r   zCurrently only quantized)r   N)rl   aoZquantizationZquantize_fxZconvert_to_reference_fxZ
convert_fxr_   r^   metaZ&standalone_module_input_quantized_idxsrb   rc   rangelenr   r   rd   rA   ri   Z'standalone_module_output_quantized_idxsr\   r   r   setattr)rB   rC   r@   r   r   Z
convert_fnZobserved_standalone_moduleZsm_input_quantized_idxsrc   idxr   r   Zsm_output_quantized_idxsZquantized_standalone_moduleparent_namer   rN   rN   rO   r>   G  s>    

F)rB   rC   observed_node_namesrE   r   is_decomposedr   rF   c                  C   s|  |t | j }|j}d}	t|}
t||
rV|j}	| }t| j\}}t|| || | j	|k}|dksvt
| |sv|szdS t|}|t|g }t||sdS t|}|sdS d}|}t|tjjjjr|}|d }d|i}t|tjjr4| }| }||j ||j t|}t|}|||d nt|tjjtjjfr|jD ]R}t||rR|drRt ||}| }	|	j!tj"kr|	| t|	||< qRn^|	dk}|r| }	t#|}|r|	$| | }|r|r|s|	|j |t|	 t%|}|t&|d}|dk	s:t'dt&| |(||}|dk	rZ||d< nt| j\}}t|| || dS )a   Convert a weighted module to reference quantized module in the model
    If the QConfig of a QAT module is not set, the module will still be converted to
    a float module.

    Args:
      - node: The call_module node of the observed standalone module
      - modules: named_module of original model
      - observed_node_names: names for the set of observed fx node, we can skip
        this conversion if the node is not observed
    Nr   r   )	weight_ih	weight_hhweightz3No reference quantized module class configured for ))r_   r^   qconfigr"   r]   Zweight_fake_quantZto_floatr   r   r   rI   r    getr   r   r   rl   r   r   Z	intrinsicZ_FusedModuleZRNNCellBaser   r   r   r   updateZLSTMZGRUZ_flat_weights_namesrk   
startswithr   rj   rn   r.   r   r   r*   r\   
from_float) rB   rC   r   rE   r   r   r   Zoriginal_moduler   Zweight_post_processqat_module_classesr   r   Zis_observedZpattern_to_dtype_configsZdtype_configsZis_weight_quantizedZfused_moduleZfloat_moduleZwq_or_wq_dictZweight_post_process_ihZweight_post_process_hhZweight_qparams_ihZweight_qparams_hhZwnr   Zis_ptqZdeviceis_qat)root_module_to_quantized_reference_moduleZref_qmodule_clsZref_qmodulerN   rN   rO   r?     s    










)rB   	prev_noderA   rF   c                 C   sZ   t |tstd| |jdkrV|jdkrV| ||jd  t|jdkrV|	| dS )z
    Given a custom module `node`, if the previous node is a dequantize, reroute the custom as follows:

    Before: quantize - dequantize - custom_module
    After: quantize - custom_module
                 \ - dequantize
    zDExpecting the argument for custom module node to be a Node, but got r   r   r   N)
r]   r   r\   r   r^   r   rc   r   rd   ri   )rB   r   rA   rN   rN   rO   ,_remove_previous_dequantize_in_custom_module  s    	r   )rB   rA   rC   custom_module_class_mapping(statically_quantized_custom_module_nodesrF   c                 C   s  |t | j }t| |}|j}t|r||  t| |rt| jdkrlt	| jd t
rlt| jd dkspt| j\}\}	}
t	|tstt	|	tstt	|
tstt| || t| |	| t| |
| nt| |rNt| jdkst| j\}}}t	|tstt	|tstt	|ts(tt| || t| || t| || nD| jd }t	|tshtt| || t| |}|dk	st||_t|||}||}t| j\}}t|| || dS )a   Converts an observed custom module to a quantized custom module based on
    `custom_module_class_mapping`
    For static quantization, we'll also remove the previous `dequantize` node and
    attach the observer node for output to the module, the observer for the node
    will be converted to a dequantize node instead of quantize-dequantize pairs
    later in the graph. In the end we would have a quantized custom module that
    has the same interface as a default quantized module in nn.quantized namespace,
    i.e. quantized input and quantized output.

    Args:
      - node: The call_module node of the observed standalone module
      - graph: The graph containing the node
      - modules: named_module of original model
      - custom_module_class_mapping: mapping from observed custom module class to
        quantized custom module class, used to swap custom modules
      - statically_quantized_custom_module_nodes: we'll add the custom module node
        if we find it is statically quantized, this will be used later when converting
        observers to quant/dequant node pairs, if the observed node is a statically
        quantized custom module nodes, we'll convert the observer to a dequantize node,
        this is to keep the interface the same as the default quantized module.
        TODO: maybe we want to redesign this part to align with reference model design
        as well, but there has been some discussions around the interface, so we can do
        it later.
    r   r      r   N)r_   r^   r   r   r   addr,   r   rc   r]   r~   r\   r   r   r-   r   r   Zfrom_observedr   r   )rB   rA   rC   r   r   Zobserved_custom_moduler   r   inputsZhidden0Zhidden1queryr   r   r   r   Zquantized_custom_module_classZquantized_custom_moduler   r   rN   rN   rO   r=     sV    





  T)	r@   r   convert_custom_configis_standalone_module_remove_qconfig_flagqconfig_mappingr   r   rF   c           &   	   C   s  |dkrt  }t|tr,td t |}t|trRtd |rNt|nd}t|}|dksrt|tsrt	t|trtd t
|}|dkrt }t| st	d| jd }|j}	|j}
|j}|j}t| jdd}|r|j}t|}|jrt|| t| | t|| t| || j||	}| D ]`\}}||ksXt	d	| d
|| dk	r6t||| s6t	d| d| d||  q6|}t|j}|j}|jdk	rt | |}t!| || t"| | g }| jj#D ]}|j$dkr|%|j& qd}|
j'}|
j(}t)|}t*|+ }t,|}t-|}t. }t/| jj#D ]}|j$dkr|} |d7 }| |krPt0|| j qJ|j$dkr t1|dkrqJ|}!|j2d }"t|"t/t*fr|D ]}#t3|"|# |!| j qn>t|"t4tfrd|krt3|"|!| j ntdt5|"  qJ|j$dkrJt6||}$|$dk	sDt	t7|$r|j2d }%|%|krpt8|| j n0|rt9| | j|||	| nt:| | j|||	| nt|$t;rt8|| j nt<|$rt=||| || nxt>|$t.|?|?|kr0t>|$|krt>|$d |krqJt@||||||| n t>|$|krJtA|| j||| qJ| jB  tC| | j} |s|tD| ||	} |rtE|  | F  | jGdd | S )a  
    We will convert an observed model (a module with observer calls) to a reference
    quantized model, the rule is simple:
    1. for each observer module call in the graph, we'll convert it to calls to
       quantize and dequantize functions based on the observer instance
    2. for weighted operations like linear/conv, we need to convert them to reference
       quantized module, this requires us to know whether the dtype configured for the
       weight is supported in the backend, this is done in prepare step and the result
       is stored in observed_node_names, we can decide whether we need to swap the
       module based on this set

    Args:
       * `is_standalone_module`: when this flag is True, it means we are quantizing
       a submodule that is not inlined in parent module, and will be quantized
       separately as one unit.

       * `is_decomposed`: a boolean flag to indicate whether we want to use the
        quantize operator for decomposed quantized tensor
        (torch.ops.quantized_decomposed.quantize_per_tensor) or default/standalone
        quantized tensor (torch.quantize_per_tensor)

    Returns:
         a quantized standalone module, whether input/output is quantized is
         specified by prepare_custom_config, with
         input_quantized_idxs, output_quantized_idxs, please
         see docs for :func:`~torch.ao.quantization.prepare_fx` for details
    NzPassing a convert_custom_config_dict to convert is deprecated and will not be supported in a future version. Please pass in a ConvertCustomConfig instead.zPassing a QConfig dictionary to convert is deprecated and will not be supported in a future version. Please pass in a QConfigMapping instead.zPassing a backend_config_dict to prepare is deprecated and will not be supported in a future version. Please pass in a BackendConfig instead.z-incoming model must be produced by prepare_fxr   F)Zremove_duplicatezExpected key z  in convert node_name_to_qconfigzExpected k zD to have the same value in prepare and convert QConfigMappings, but z was updated to placeholderr   r   outputz1Unsupported node type for output_quantized_idxs: r   )Hr8   r]   r   r   r   	from_dictr   copydeepcopyr\   r#   r$   r&   r   rD   prepare_custom_configr   rE   r   Znamed_modulesr   r   r   r   r   r   rA   r{   r   r/   Zobserved_to_quantized_mappingZ!equalization_node_name_to_qconfigr(   r)   r   r   r   r|   r   Zinput_quantized_indexesZoutput_quantized_indexesr   r~   re   r"   r!   setrb   r   r   rc   r   r   r   r+   r%   r   r   r   r7   r'   r>   r*   unionr?   r=   Zeliminate_dead_coder   r:   r6   Zdelete_all_unused_submodulespop)&r@   r   r   r   r   r   r   r   Zobserved_graph_module_attrsrD   r   r   rE   rC   Zprepare_qconfig_mappingZmodules_copyZconvert_node_name_to_qconfigkvZcustom_module_classesr   Zweight_eq_obs_dictZgraph_inputsrB   Zplaceholder_node_seen_cntZinput_quantized_idxsZoutput_quantized_idxsr   Zroot_module_classesr   Zfused_module_classesr   Zcur_placeholder_node_idxZreturn_noder   r   modr   rN   rN   rO   r<   p  s<   #











    









        
            
)FF)FNFTNNF)mtypingr   r   r   r   r   r   r   r	   r
   Z torch.ao.quantization.quant_typer   rl   r   r   Ztorch.fxr   Ztorch.fx.graphr   r   r   utilsr   r   r   r   r   r   r   r   r   r   Zqconfig_mapping_utilsr   r   r   r   r   Z*torch.ao.quantization.backend_config.utilsr   r    r!   r"   Z$torch.ao.quantization.backend_configr#   r$   Ztorch.ao.quantization.observerr%   Zgraph_moduler&   r'   Z	_equalizer(   r)   Ztorch.nn.utils.parametrizer*   r+   r,   r-   r.   r/   r0   r1   r2   r3   Ztorch.ao.quantization.utilsr4   r5   Ztorch.ao.quantization.quantizer6   Ztorch.ao.quantization.stubsr7   Zcustom_configr8   r9   r:   Z_decomposedr;   r   __all__r   rs   rt   Zchoose_qparamsr   Zper_tensor_symmetricZchoose_qparams_symmetricr   r   Moduler_   r   r   r   r   boolrf   rI   r   r   r`   r   r   Zfxr>   r?   r   r=   r<   rN   rN   rN   rO   <module>   s   ,, 
 

  b
 
 
1 
 D  
  T         