U
    9%eM                     @   s   d dl Z d dlZd dlZddlmZmZmZmZm	Z	 ddl
mZmZmZmZmZ e rnd dlZddlmZmZ e re rd dlZd dlZddlmZmZ G dd	 d	eZeeG d
d deZdS )    N   )add_end_docstrings#is_tensorflow_probability_availableis_tf_availableis_torch_availablerequires_backends   )PIPELINE_INIT_ARGSArgumentHandlerDatasetPipelinePipelineException),MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES0MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES)/TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES3TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMESc                   @   s   e Zd ZdZdddZdS )%TableQuestionAnsweringArgumentHandlerzB
    Handles arguments for the TableQuestionAnsweringPipeline
    Nc                 K   st  t | d dd l}|d kr&tdn|d krt|tr^|dd k	r^|dd k	r^|g}nt|trt|dkrtdd |D stdd	d |D  |d dd k	r|d dd k	r|}ntd
|d 	  dn:t
d k	rt|t
st|tjr|S tdt| dn||dg}|D ]@}t|d |js.|d d krZtd||d |d< q.|S )Npandasr   z(Keyword argument `table` cannot be None.querytablec                 s   s   | ]}t |tV  qd S N)
isinstancedict.0d r   n/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/transformers/pipelines/table_question_answering.py	<genexpr>7   s     zATableQuestionAnsweringArgumentHandler.__call__.<locals>.<genexpr>z:Keyword argument `table` should be a list of dict, but is c                 s   s   | ]}t |V  qd S r   )typer   r   r   r   r   9   s     zIf keyword argument `table` is a list of dictionaries, each dictionary should have a `table` and `query` key, but only dictionary has keys z `table` and `query` keys.zZInvalid input. Keyword argument `table` should be either of type `dict` or `list`, but is ))r   r   zTable cannot be None.)r   r   
ValueErrorr   r   getlistlenallkeysr   typesGeneratorTyper   Z	DataFrame)selfr   r   kwargspdZtqa_pipeline_inputsZtqa_pipeline_inputr   r   r   __call__'   s:    

&$"z.TableQuestionAnsweringArgumentHandler.__call__)NN)__name__
__module____qualname____doc__r,   r   r   r   r   r   "   s   r   c                       sl   e Zd ZdZdZe f fdd	Zdd Zdd Z fd	d
Z	dddZ
dddZdddZdd Z  ZS )TableQuestionAnsweringPipelinea  
    Table Question Answering pipeline using a `ModelForTableQuestionAnswering`. This pipeline is only available in
    PyTorch.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> oracle = pipeline(model="google/tapas-base-finetuned-wtq")
    >>> table = {
    ...     "Repository": ["Transformers", "Datasets", "Tokenizers"],
    ...     "Stars": ["36542", "4512", "3934"],
    ...     "Contributors": ["651", "77", "34"],
    ...     "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
    ... }
    >>> oracle(query="How many stars does the transformers repository have?", table=table)
    {'answer': 'AVERAGE > 36542', 'coordinates': [(0, 1)], 'cells': ['36542'], 'aggregator': 'AVERAGE'}
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This tabular question answering pipeline can currently be loaded from [`pipeline`] using the following task
    identifier: `"table-question-answering"`.

    The models that this pipeline can use are models that have been fine-tuned on a tabular question answering task.
    See the up-to-date list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=table-question-answering).
    ztable,queryc                    s   t  j|| || _| jdkr2t }|t nt }|t	 | 
| tt| jjdd ottt| jjdd | _t| jjdrdnd | _d S )Ntfaggregation_labelsZnum_aggregation_labelstapas)super__init___args_parser	frameworkr   copyupdater   r   r   Zcheck_model_typeboolgetattrmodelconfig	aggregatehasattrr   )r)   Zargs_parserargsr*   mapping	__class__r   r   r6   y   s    


z'TableQuestionAnsweringPipeline.__init__c                 K   s   | j f |S r   )r=   )r)   inputsr   r   r   batch_inference   s    z.TableQuestionAnsweringPipeline.batch_inferencec                    sr  | j dkrg }g }d}|d jd }|d | j}|d | j}|d | j}d}	t|D ].}
|dk	rR|	dddf }t|  }||
 }	t|jd D ]}|	dddf 	 | }|	dddf 	 | d }|	ddd	f 	 | d }|dkr|dkr|dkrt
|||f ||< qt|tj| j|	dddf< ||
 }||
 }||
 }	| j|d|d|	dd
}|j}| jr||j || tjj|d}|j|tj|jj }tt t| 	 D ]\}}|	dddf 	 | }|	dddf 	 | d }|	ddd	f 	 | d }|dkr|dkr|dkr ||f | q fdd D }qbtt|d}| js|fS |tt|dfS g }g }d}|d jd }|d }|d }|d  }d}	t|D ]0}
|dk	r|	dddf }tj|tj d}||
 }	t|jd D ]}|	dddf 	 | }|	dddf 	 | d }|	ddd	f 	 | d }|dkrN|dkrN|dkrNt
|||f ||< qN||	dddf< ||
 }||
 }||
 }	| jtj!|ddtj!|ddtj!|	ddd
}|j}| jrH||j || t"jj|d}|# t$%|t$j }tt |	}	tt$| 	 D ]\}}|	dddf 	 | }|	dddf 	 | d }|	ddd	f 	 | d }|dkr|dkr|dkr ||f | q fdd D }qt$&t|d}| jsZ|fS |t$&t|dfS dS )z
        Inference used for models that need to process sequences in a sequential fashion, like the SQA models which
        handle conversational query related to a table.
        ptN	input_idsr   attention_masktoken_type_ids   r   r   )rH   rI   rJ   )logitsc                    s$   i | ]}|t  |  d kqS g      ?nparrayZmeanr   keyZcoords_to_probsr   r   
<dictcomp>   s      zGTableQuestionAnsweringPipeline.sequential_inference.<locals>.<dictcomp>)Zdtype)Zaxisc                    s$   i | ]}|t  |  d kqS rM   rN   rQ   rS   r   r   rT     s      )'r8   shapetoZdevicerangerO   Z
zeros_likecpunumpytolistinttorchZ
from_numpyr   longr=   Z	unsqueezerL   r?   appendZlogits_aggregationdistributionsZ	BernoulliZprobsZfloat32collectionsdefaultdictr#   	enumerateZsqueezecattupleZint32Zexpand_dimstfpZprobs_parameterr2   castconcat)r)   rE   Z
all_logitsZall_aggregationsZprev_answersZ
batch_sizerH   rI   rJ   Ztoken_type_ids_exampleindexZprev_labels_exampleZmodel_labelsiZ
segment_idZcol_idZrow_idZinput_ids_exampleZattention_mask_exampleoutputsrL   Zdist_per_tokenZprobabilitiespcolrowZlogits_batchr   rS   r   sequential_inference   s    
&

"


z3TableQuestionAnsweringPipeline.sequential_inferencec                    s4   | j ||}t j|f|}t|dkr0|d S |S )a  
        Answers queries according to a table. The pipeline accepts several types of inputs which are detailed below:

        - `pipeline(table, query)`
        - `pipeline(table, [query])`
        - `pipeline(table=table, query=query)`
        - `pipeline(table=table, query=[query])`
        - `pipeline({"table": table, "query": query})`
        - `pipeline({"table": table, "query": [query]})`
        - `pipeline([{"table": table, "query": query}, {"table": table, "query": query}])`

        The `table` argument should be a dict or a DataFrame built from that dict, containing the whole table:

        Example:

        ```python
        data = {
            "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
            "age": ["56", "45", "59"],
            "number of movies": ["87", "53", "69"],
            "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
        }
        ```

        This dictionary can be passed in as such, or can be converted to a pandas DataFrame:

        Example:

        ```python
        import pandas as pd

        table = pd.DataFrame.from_dict(data)
        ```

        Args:
            table (`pd.DataFrame` or `Dict`):
                Pandas DataFrame or dictionary that will be converted to a DataFrame containing all the table values.
                See above for an example of dictionary.
            query (`str` or `List[str]`):
                Query or list of queries that will be sent to the model alongside the table.
            sequential (`bool`, *optional*, defaults to `False`):
                Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the
                inference to be done sequentially to extract relations within sequences, given their conversational
                nature.
            padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
                Activates and controls padding. Accepts the following values:

                - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
                  sequence if provided).
                - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
                  acceptable input length for the model if that argument is not provided.
                - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
                  lengths).

            truncation (`bool`, `str` or [`TapasTruncationStrategy`], *optional*, defaults to `False`):
                Activates and controls truncation. Accepts the following values:

                - `True` or `'drop_rows_to_fit'`: Truncate to a maximum length specified with the argument `max_length`
                  or to the maximum acceptable input length for the model if that argument is not provided. This will
                  truncate row by row, removing rows from the table.
                - `False` or `'do_not_truncate'` (default): No truncation (i.e., can output batch with sequence lengths
                  greater than the model maximum admissible input size).


        Return:
            A dictionary or a list of dictionaries containing results: Each result is a dictionary with the following
            keys:

            - **answer** (`str`) -- The answer of the query given the table. If there is an aggregator, the answer will
              be preceded by `AGGREGATOR >`.
            - **coordinates** (`List[Tuple[int, int]]`) -- Coordinates of the cells of the answers.
            - **cells** (`List[str]`) -- List of strings made up of the answer cell values.
            - **aggregator** (`str`) -- If the model has an aggregator, this returns the aggregator.
        r   r   )r7   r5   r,   r$   )r)   rA   r*   Zpipeline_inputsresultsrC   r   r   r,     s
    Kz'TableQuestionAnsweringPipeline.__call__Nc                 K   sB   i }|d k	r||d< |d k	r$||d< i }|d k	r8||d< ||i fS )Npadding
truncation
sequentialr   )r)   rr   rp   rq   r*   Zpreprocess_paramsZforward_paramsr   r   r   _sanitize_parameters`  s    z3TableQuestionAnsweringPipeline._sanitize_parametersTc                 C   sv   |d kr| j dkrd}nd}|d |d  }}|jr<td|d ksL|dkrTtd| j||| j||d	}||d< |S )
Nr4   Zdrop_rows_to_fitZdo_not_truncater   r   ztable is empty zquery is empty)Zreturn_tensorsrq   rp   )r   emptyr!   	tokenizerr8   )r)   Zpipeline_inputrr   rp   rq   r   r   rE   r   r   r   
preprocessl  s    
z)TableQuestionAnsweringPipeline.preprocessFc                 C   sR   | d}| jdkr4|r&| jf |}qB| jf |}n| jjf |}|||d}|S )Nr   r4   )model_inputsr   rj   )popr   rn   rF   r=   generate)r)   rx   rr   r   rj   model_outputsr   r   r   _forward|  s    

z'TableQuestionAnsweringPipeline._forwardc                    sz  |d }|d |d }j dkrFjr|d d \}}j|||}|\}}fddt|D  jjj fddt|D }	n&|d	 }j||}|d	 }i  i }	g }
t|D ]n\}}fd
d|D } |d}|	|d}|d	| |fdd|D d}|r"||d< |

| qt|d	kr`tdndd jj|ddD }
t|
dkrr|
S |
d	 S )Nrx   r   rj   r4   r   c                    s    i | ]\}}| j jj| qS r   )r=   r>   r3   r   ri   pred)r)   r   r   rT     s      z>TableQuestionAnsweringPipeline.postprocess.<locals>.<dictcomp>c                    s&   i | ]\}}|kr| | d  qS )z > r   r}   )aggregatorsno_agg_label_indexr   r   rT     s      r   c                    s   g | ]} j | qS r   Ziatr   Z
coordinater   r   r   
<listcomp>  s     z>TableQuestionAnsweringPipeline.postprocess.<locals>.<listcomp>rt   z, c                    s   g | ]} j | qS r   r   r   r   r   r   r     s     )answercoordinatescells
aggregatorzEmpty answerc                 S   s   g | ]}d |iqS )r   r   )r   r   r   r   r   r     s     T)Zskip_special_tokensr   )r   r?   rv   Zconvert_logits_to_predictionsrb   r=   r>   Zno_aggregation_label_indexr"   joinr^   r$   r   Zbatch_decode)r)   r{   rE   rj   rL   Z
logits_aggZpredictionsZanswer_coordinates_batchZagg_predictionsZaggregators_prefixZanswersrh   r   r   r   Zaggregator_prefixr   r   )r   r   r)   r   r   postprocess  sD    

z*TableQuestionAnsweringPipeline.postprocess)NNN)NTN)F)r-   r.   r/   r0   Zdefault_input_namesr   r6   rF   rn   r,   rs   rw   r|   r   __classcell__r   r   rC   r   r1   W   s    R


r1   )r`   r'   rY   rO   utilsr   r   r   r   r   baser	   r
   r   r   r   r\   Zmodels.auto.modeling_autor   r   Z
tensorflowr2   Ztensorflow_probabilityre   Zmodels.auto.modeling_tf_autor   r   r   r1   r   r   r   r   <module>   s   5