U
    9%e                     @   s   d Z ddlZddlZddlZddlZddlmZmZmZm	Z	 ddl
mZ ddlmZmZ ddlZddlZddlmZ ddlmZ dd	lmZmZ dd
lmZmZmZmZmZm Z  ddl!m"Z" ddl
m#Z# e rddl$Z$e rddl%Z%e#&e'Z(G dd dZ)dS )zTensorflow trainer class.    N)CallableDictOptionalTuple   )ENV_VARS_TRUE_VALUES)is_comet_availableis_wandb_available)
PerReplica)TFPreTrainedModel)GradientAccumulatorcreate_optimizer)PREFIX_CHECKPOINT_DIREvalPredictionIntervalStrategyPredictionOutputenable_full_determinismset_seed)TFTrainingArguments)loggingc                
   @   s  e Zd ZdZd7eeeejj	 eejj	 ee
egef  eejj eejjjejjjjf dddZejj	ddd	Zd8eejj	 ejj	d
ddZejj	ejj	dddZedddZdd Zdd Zd9ejj	eeeee edddZ eee!f ddddZ"d:eejj	 eee!f d
ddZ#ej$ej$ej$ej$dd d!Z%ej&d"d# Z'ddd$d%Z(d&d' Z)d(d) Z*ej&d*d+ Z+e,d,d- Z-e,d.d/ Z.d0d1 Z/ejj	edd2d3Z0d;ee d4d5d6Z1dS )<	TFTraineru	  
    TFTrainer is a simple but feature-complete training and eval loop for TensorFlow, optimized for 🤗 Transformers.

    Args:
        model ([`TFPreTrainedModel`]):
            The model to train, evaluate or use for predictions.
        args ([`TFTrainingArguments`]):
            The arguments to tweak training.
        train_dataset ([`~tf.data.Dataset`], *optional*):
            The dataset to use for training. The dataset should yield tuples of `(features, labels)` where `features`
            is a dict of input features and `labels` is the labels. If `labels` is a tensor, the loss is calculated by
            the model by calling `model(features, labels=labels)`. If `labels` is a dict, such as when using a
            QuestionAnswering head model with multiple targets, the loss is instead calculated by calling
            `model(features, **labels)`.
        eval_dataset ([`~tf.data.Dataset`], *optional*):
            The dataset to use for evaluation. The dataset should yield tuples of `(features, labels)` where `features`
            is a dict of input features and `labels` is the labels. If `labels` is a tensor, the loss is calculated by
            the model by calling `model(features, labels=labels)`. If `labels` is a dict, such as when using a
            QuestionAnswering head model with multiple targets, the loss is instead calculated by calling
            `model(features, **labels)`.
        compute_metrics (`Callable[[EvalPrediction], Dict]`, *optional*):
            The function that will be used to compute metrics at evaluation. Must take a [`EvalPrediction`] and return
            a dictionary string to metric values.
        tb_writer (`tf.summary.SummaryWriter`, *optional*):
            Object to write to TensorBoard.
        optimizers (`Tuple[tf.keras.optimizers.Optimizer, tf.keras.optimizers.schedules.LearningRateSchedule]`, *optional*):
            A tuple containing the optimizer and the scheduler to use. The optimizer default to an instance of
            [`tf.keras.optimizers.Adam`] if `args.weight_decay_rate` is 0 else an instance of [`AdamWeightDecay`]. The
            scheduler will default to an instance of [`tf.keras.optimizers.schedules.PolynomialDecay`] if
            `args.num_warmup_steps` is 0 else an instance of [`WarmUp`].
    NNN)modelargstrain_dataseteval_datasetcompute_metrics	tb_writer
optimizersc                 C   s   || _ || _|| _|| _|| _|\| _| _t | _d| _	d| _
tjj | _tdt |d k	rh|| _ntj| jj| _t r|   ntdd tkrtd t r|    ntj!"ddkrtd | jj#rt$| jj%n
t&| jj% d S )	Nr   a[  The class `TFTrainer` is deprecated and will be removed in version 5 of Transformers. We recommend using native Keras instead, by calling methods like `fit()` and `predict()` directly on the model object. Detailed examples of the Keras style can be found in our examples at https://github.com/huggingface/transformers/tree/main/examples/tensorflowZWANDB_DISABLED zYou are instantiating a Trainer but W&B is not installed. To use wandb logging, run `pip install wandb && wandb login` see https://docs.wandb.com/huggingface.
COMET_MODEZDISABLEDzoTo use comet_ml logging, run `pip/conda install comet_ml` see https://www.comet.ml/docs/python-sdk/huggingface/)'r   r   r   r   r   	optimizerlr_schedulerr   gradient_accumulatorglobal_stepepoch_loggingtfkerasmetricsSum	eval_losswarningswarnFutureWarningr   summaryZcreate_file_writerlogging_dirr	   setup_wandbosgetenvupperr   loggerinfor   setup_cometenvirongetZfull_determinismr   seedr   )selfr   r   r   r   r   r   r    r;   V/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/transformers/trainer_tf.py__init__^   s<    

zTFTrainer.__init__)returnc                 C   s   | j dkrtd| jj| jj | _| j   | _| jdk rFtd| j 	 j
| j| jjdj| j| jjdtjjj}| jj|S )z
        Returns the training [`~tf.data.Dataset`].

        Subclass and override this method if you want to inject some custom behavior.
        Nz+Trainer: training requires a train_dataset.r   6The training dataset must have an asserted cardinality)r9   Zdrop_remainder)r   
ValueErrorr   train_batch_sizegradient_accumulation_stepstotal_train_batch_sizecardinalitynumpynum_train_examplesrepeatshuffler9   batchdataloader_drop_lastprefetchr&   dataexperimentalAUTOTUNEstrategyexperimental_distribute_dataset)r:   dsr;   r;   r<   get_train_tfdataset   s"    


  zTFTrainer.get_train_tfdataset)r   r>   c                 C   s   |dkr| j dkrtd|dk	r&|n| j }|  }|dk rHtd| jjrVtjntj}||| jj	 }|
 j| jj	| jjdtjjj}| jj|||fS )a  
        Returns the evaluation [`~tf.data.Dataset`].

        Args:
            eval_dataset ([`~tf.data.Dataset`], *optional*):
                If provided, will override *self.eval_dataset*. The dataset should yield tuples of `(features, labels)`
                where `features` is a dict of input features and `labels` is the labels. If `labels` is a tensor, the
                loss is calculated by the model by calling `model(features, labels=labels)`. If `labels` is a dict,
                such as when using a QuestionAnswering head model with multiple targets, the loss is instead calculated
                by calling `model(features, **labels)`.

        Subclass and override this method if you want to inject some custom behavior.
        Nz-Trainer: evaluation requires an eval_dataset.r   r?   r@   )r   rA   rE   rF   r   rK   mathfloorceileval_batch_sizerH   rJ   rL   r&   rM   rN   rO   rP   rQ   )r:   r   num_examplesapproxstepsrR   r;   r;   r<   get_eval_tfdataset   s      zTFTrainer.get_eval_tfdataset)test_datasetr>   c                 C   s\   |   }|dk rtdt|| jj }|| jjt	j
jj}| jj|||fS )a  
        Returns a test [`~tf.data.Dataset`].

        Args:
            test_dataset ([`~tf.data.Dataset`]):
                The dataset to use. The dataset should yield tuples of `(features, labels)` where `features` is a dict
                of input features and `labels` is the labels. If `labels` is a tensor, the loss is calculated by the
                model by calling `model(features, labels=labels)`. If `labels` is a dict, such as when using a
                QuestionAnswering head model with multiple targets, the loss is instead calculated by calling
                `model(features, **labels)`.

        Subclass and override this method if you want to inject some custom behavior.
        r   r?   )rE   rF   rA   rT   rV   r   rW   rJ   rL   r&   rM   rN   rO   rP   rQ   )r:   r\   rX   rZ   rR   r;   r;   r<   get_test_tfdataset   s    zTFTrainer.get_test_tfdatasetnum_training_stepsc              
   C   sn   | j sj| jsj| jjdkr | jjnt|| jj }t| jj||| jj	| jj
| jj| jj| jjd\| _ | _dS )a  
        Setup the optimizer and the learning rate scheduler.

        We provide a reasonable default that works well. If you want to use something else, you can pass a tuple in the
        TFTrainer's init through `optimizers`, or subclass and override this method.
        r   )
adam_beta1
adam_beta2adam_epsilonZweight_decay_ratepowerN)r!   r"   r   warmup_stepsrT   rV   Zwarmup_ratior   learning_rater`   ra   rb   Zweight_decayZ
poly_power)r:   r_   rd   r;   r;   r<   create_optimizer_and_scheduler   s    

z(TFTrainer.create_optimizer_and_schedulerc                 C   s@   t d | jj | j }tjt	
dd|| jjd dS )ai  
        Setup the optional Weights & Biases (`wandb`) integration.

        One can subclass and override this method to customize the setup if needed. Find more information `here
        <https://docs.wandb.com/huggingface>`__. You can also override the following environment variables:

        Environment:
            WANDB_PROJECT:
                (Optional): str - "huggingface" by default, set this to a custom string to store results in a different
                project.
            WANDB_DISABLED:
                (Optional): boolean - defaults to false, set to "true" to disable wandb entirely.
        z`Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"ZWANDB_PROJECThuggingface)projectconfignameN)r4   r5   r   ri   to_dictr   Zto_sanitized_dictwandbinitr1   r2   Zrun_name)r:   Zcombined_dictr;   r;   r<   r0      s    
zTFTrainer.setup_wandbc                 C   s   t dd }dt ddi}d}|dkrDtjf |}td n.|dkrrt d	d
|d< tjf |}td |dk	r|j| j	dd |j
| jddd |j
| j	jddd dS )aL  
        Setup the optional Comet.ml integration.

        Environment:
            COMET_MODE:
                (Optional): str - "OFFLINE", "ONLINE", or "DISABLED"
            COMET_PROJECT_NAME:
                (Optional): str - Comet.ml project name for experiments
            COMET_OFFLINE_DIRECTORY:
                (Optional): str - folder to use for saving offline experiments when `COMET_MODE` is "OFFLINE"

        For a number of configurable items in the environment, see `here
        <https://www.comet.ml/docs/python-sdk/advanced/#comet-configuration-variables>`__
        r    ZONLINEproject_nameZCOMET_PROJECT_NAMErg   Nz)Automatic Comet.ml online logging enabledZOFFLINEZCOMET_OFFLINE_DIRECTORYz./Zoffline_directoryzLAutomatic Comet.ml offline logging enabled; use `comet upload` when finishedtransformers)	frameworkzargs/)prefixrp   zconfig/)r1   r2   r3   comet_mlZ
Experimentr4   r5   ZOfflineExperimentZ_set_model_graphr   Z_log_parametersr   ri   )r:   Z
comet_moder   
experimentr;   r;   r<   r6     s    
zTFTrainer.setup_comet)datasetrZ   rX   descriptionprediction_loss_onlyr>   c                 C   sh  |dk	r|n| j j}td| d td|  |dkrVtd| j j|   td| j j  d}d}| j  | j jdkrd| _t	|D ]\}}	| 
|	}
|	\}}|st|
tr|
d }
t|tr|d }| j jd	krT|
jD ],}|dkr| }qtj|| dd
}q|jD ].}|dkr:| }ntj|| dd
}q"nP|dkrh|
 }ntj||
 dd
}|dkr| }ntj|| dd
}||d	 kr qq| jdk	r|dk	r|dk	r| t||d}ni }| j  | |d< t| D ]&}|ds|||d| < q| j jrZt| drZt| d t|||dS )z
        Prediction/evaluation loop, shared by [`~TFTrainer.evaluate`] and [`~TFTrainer.predict`].

        Works both with or without labels.
        Nz***** Running z *****z  Num examples in dataset = 
Evaluationz'  Num examples in used in evaluation = z  Batch size = r   r   Zaxis)predictions	label_idsr*   Zeval__past)ry   rz   r(   )r   rv   r4   r5   rW   r*   reset_states
past_indexr{   	enumeratedistributed_prediction_steps
isinstancetuple
n_replicasvaluesrF   npappendr   r   resultlistkeys
startswithpophasattrdelattrr   )r:   rt   rZ   rX   ru   rv   rz   predssteprJ   logits_labelsvalr(   keyr;   r;   r<   prediction_loop1  s^    













 
zTFTrainer.prediction_loop)logsr>   c              	   C   s   | j |d< | jrV| j , | D ]\}}tjj||| jd q$W 5 Q R X | j  t	 rlt
j|| jd t rtj }|dk	r|j|| j| j dd |d| ji}t| dS )z
        Log `logs` on the various objects watching training.

        Subclass and override this method to inject custom behavior.

        Args:
            logs (`Dict[str, float]`):
                The values to log.
        epoch)r   Nro   )r   r   rp   r   )r%   r   
as_defaultitemsr&   r.   Zscalarr$   flushr	   rl   logr   rr   ri   Zget_global_experimentZ_log_metricsr4   r5   )r:   r   kvrs   outputr;   r;   r<   r     s&    

 

   zTFTrainer.logc                 C   sD   |  |\}}}| j|||dd}|j}| j|d< | | |jS )a  
        Run evaluation and returns metrics.

        The calling script will be responsible for providing a method to compute metrics, as they are task-dependent
        (pass it to the init `compute_metrics` argument).

        Args:
            eval_dataset ([`~tf.data.Dataset`], *optional*):
                Pass a dataset if you wish to override `self.eval_dataset`. The dataset should yield tuples of
                `(features, labels)` where `features` is a dict of input features and `labels` is the labels. If
                `labels` is a tensor, the loss is calculated by the model by calling `model(features, labels=labels)`.
                If `labels` is a dict, such as when using a QuestionAnswering head model with multiple targets, the
                loss is instead calculated by calling `model(features, **labels)`.

        Returns:
            A dictionary containing the evaluation loss and the potential metrics computed from the predictions.
        rw   ru   r   )r[   r   r(   r%   r   )r:   r   Zeval_dsrZ   rX   r   r   r;   r;   r<   evaluate  s    

zTFTrainer.evaluate)featuresr   nb_instances_in_global_batchr>   c                 C   s6   |  ||d\}}|tj||jd }| j| |S )z
        Compute the prediction on features and update the loss with labels.

        Subclass and override to inject some custom behavior.
        Fdtype)	run_modelr&   castr   r*   update_state)r:   r   r   r   per_example_lossr   scaled_lossr;   r;   r<   prediction_step  s    zTFTrainer.prediction_stepc                 C   s,   |  |}| ||}| jj| j|}|S N)_compute_nb_instances_get_step_inputsr   rP   runr   )r:   rJ   nb_instances_in_batchinputsr   r;   r;   r<   r     s    
z&TFTrainer.distributed_prediction_stepsc                 C   sL  |   }| jjr tjjddd | j  | j| j	 }| jj
rDtjntj}||}t|d}|| _| jjdkr| jj}| jj| j t| jj| j dk }n| j| jj }| jj}t|}| jj Z | j|d tj| jjt}tjj| j| jd}tjj ||| jj!d| j_"| jj#}d}	d}
| jj"j$rt%&d| jj"j$ d	 |'| jj"j$(  |) | _*| j*| j }	| j*| j }
t%&d
 t%&d|	  t%&d| j*  t%&d|
 d tjj+,| j* | j-.  tj/d| j0  W 5 Q R X | j-1  t%&d t%&d| j  t%&d|  t%&d| jj2  t%&d| j	  t%&d| jj3  t%&d| j  t%&d|  tj4j56 | _7t8j89 }t:|	t|D ]H}| jj;dkrd| _<t=|D ]\}}|
dkr|
d8 }
q| >| |) | _*||d | j  | _?| j7@ |d  }| jjrZi }|) |d< | j?|d< | A| | j*dkr| jjr| j-.  tjjBd| j*| jjCd W 5 Q R X | jjDdkr| jjEtFjGkr| j*| jjD dkr| H  | jjIdkr| j*| jjI dks| j*dkrH| jjJrHi }|) |d< | K| j*) |d< | j?|d< | A| | jjLdkr| j*| jjL dkr| jj"M }t%&d| j* d|  | jjdkr| j*|kr q| j*| j dkr qʐq| j7N  | jjdkr| j*| jjkr qqt8j89 }t%&d tO||   W 5 Q R X | jj;rHtP| d!rHtQ| d! dS )"z2
        Train method to train the model.
        T)graphZprofilerr   r   r^   )r!   r   )Zmax_to_keepzCheckpoint file z$ found and restoring from checkpointzE  Continuing training from checkpoint, will skip to saved global_stepz!  Continuing training from epoch z'  Continuing training from global step z  Will skip the first z steps in the first epochr   z***** Running training *****z  Num examples = z  Num Epochs = z(  Instantaneous batch size per device = zE  Total train batch size (w. parallel, distributed & accumulation) = z   Gradient Accumulation steps = z  Steps per epoch = z  Total optimization steps = Nlossr   training)rj   r   Zprofiler_outdirre   zSaving checkpoint for step z at zTraining took: r{   )RrS   r   debugr&   r.   Ztrace_onr#   resetrG   rD   rK   rT   rU   rV   maxZsteps_per_epochZ	max_stepsintZnum_train_epochsfloatrP   scoperf   r1   pathjoin
output_dirr   trainZ
Checkpointr!   r   ZCheckpointManagerZsave_total_limitZckpt_manager
iterationsZlatest_checkpointr4   r5   restoreZexpect_partialrF   r$   rN   Zset_stepr   r   textZto_json_stringr   Zper_device_train_batch_sizerC   r'   r(   r)   
train_lossdatetimenowranger}   r{   r~   distributed_training_stepsr%   r   r   Ztrace_exportr/   Z
eval_stepsZevaluation_strategyr   ZSTEPSr   Zlogging_stepsZlogging_first_stepr"   Z
save_stepssaver|   strr   r   )r:   Ztrain_dsZnum_update_steps_per_epochrY   Zt_totalZepochsfolderZckptr   Zepochs_trainedZsteps_trained_in_current_epoch
start_timeZ
epoch_iterr   rJ   Ztraining_lossr   Zckpt_save_pathend_timer;   r;   r<   r     s    













  
"

"



"zTFTrainer.trainc                 C   s   |  ||d\}}|tj||jd }t|| jj}dd t|| jjD }| jj	dkrd| 
| | j| | jj	dkr|S dS )z
        Perform a training step on features and labels.

        Subclass and override to inject some custom behavior.
        Tr   c                 S   s&   g | ]\}}|d k	r|nt |qS r   )r&   Z
zeros_like).0gr   r;   r;   r<   
<listcomp>  s    z+TFTrainer.training_step.<locals>.<listcomp>r   N)r   r&   r   r   	gradientsr   trainable_variableszipr   rC   r#   r   r   )r:   r   r   r   r   r   r   r   r;   r;   r<   training_stepv  s    
zTFTrainer.training_stepc                    sz  j jdkr8|||}jtt|jj n>t	
j jD ]}fdd| D  t	|r|d j jj j  n*t|trfdd| D ntd |  fdd| D }t	|rt	j|j jj j d  gdd}qFt|tr.fd	d| D }qFtdqFjj}fd
d|D }jtt|jj j  d S )Nr   c                    s*   i | ]"\}}||d  j j j j  qS r   r   rB   r   r   r   ftr:   r;   r<   
<dictcomp>  s     z-TFTrainer.apply_gradients.<locals>.<dictcomp>c                    s*   i | ]"\}}||d  j j j j  qS r   r   r   r   Zlblr   r;   r<   r     s     z0The labels must be either a tf.Tensor or a dict.c                    s<   i | ]4\}}|t j|jjjj d   | gddqS Nr   rx   r&   concatr   rB   r   r   )reduced_featuresr:   r;   r<   r     s    r   rx   c                    s<   i | ]4\}}|t j|jjjj d   | gddqS r   r   r   )reduced_labelsr:   r;   r<   r     s    c                    s$   g | ]}t | jj  jjqS r;   )r&   Zclip_by_valuer   Zmax_grad_norm)r   Zgradr   r;   r<   r     s    z-TFTrainer.apply_gradients.<locals>.<listcomp>)r   rC   r   r!   apply_gradientsr   r   r   r   r&   r   r   Z	is_tensorrB   r   r   dictrA   r   r#   r   r   )r:   r   r   r   r   r   r;   )r   r   r:   r<   r     sD    



 

zTFTrainer.apply_gradientsc              	   C   sD   | j j . | |}| ||}| j j| j| W 5 Q R X d S r   )r   rP   r   r   r   r   r   )r:   rJ   r   r   r;   r;   r<   r     s    
z$TFTrainer.distributed_training_stepsc                 C   s@   | d }t |tr"tj|jdd}ttj|dktjd}|S )Nr   rx   ir   )r   r
   r&   r   r   Z
reduce_sumr   Zint32)rJ   r   nb_instancesr;   r;   r<   r     s
    
zTFTrainer._compute_nb_instancesc                 C   s4   | \}}t |tr&t|gt|j }|||f}|S r   )r   r
   lenr   )rJ   r   r   r   Zstep_inputsr;   r;   r<   r     s
    

zTFTrainer._get_step_inputsc                 C   s   | j jdkr&t| dddk	r&| j|d< t|trP| j|fd|i|dd }n| j|||ddd }|dd \}}| j jdkr|| j j | _||fS )a  
        Computes the loss of the given features and labels pair.

        Subclass and override this method if you want to inject some custom behavior.

        Args:
            features (`tf.Tensor`): A batch of input features.
            labels (`tf.Tensor`): A batch of labels.
            training (`bool`): Whether or not to run the model in training mode.

        Returns:
            A tuple of two `tf.Tensor`: The loss and logits.
        r   r{   NZmemsr      )r   r   )r   r}   getattrr{   r   r   r   )r:   r   r   r   outputsr   r   r;   r;   r<   r     s    

 zTFTrainer.run_modelc                 C   s"   |  |\}}}| j|||ddS )a  
        Run prediction and returns predictions and potential metrics.

        Depending on the dataset and your use case, your test dataset may contain labels. In that case, this method
        will also return metrics, like in `evaluate()`.

        Args:
            test_dataset ([`~tf.data.Dataset`]):
                Dataset to run the predictions on. The dataset should yield tuples of `(features, labels)` where
                `features` is a dict of input features and `labels` is the labels. If `labels` is a tensor, the loss is
                calculated by the model by calling `model(features, labels=labels)`. If `labels` is a dict, such as
                when using a QuestionAnswering head model with multiple targets, the loss is instead calculated by
                calling `model(features, **labels)`

        Returns: *NamedTuple* A namedtuple with the following keys:

            - predictions (`np.ndarray`): The predictions on `test_dataset`.
            - label_ids (`np.ndarray`, *optional*): The labels (if the dataset contained some).
            - metrics (`Dict[str, float]`, *optional*): The potential dictionary of metrics (if the dataset contained
              labels).
        Z
Predictionr   )r]   r   )r:   r\   Ztest_dsrZ   rX   r;   r;   r<   predict  s    zTFTrainer.predict)r   c                 C   sH   |dk	r|n| j j}td|  t| jts8td| j| dS )zV
        Will save the model, so you can reload it using `from_pretrained()`.
        NzSaving model in z1Trainer.model appears to not be a PreTrainedModel)	r   r   r4   r5   r   r   r   rA   Zsave_pretrained)r:   r   r;   r;   r<   
save_model  s
    zTFTrainer.save_model)NNNNr   )N)N)N)N)2__name__
__module____qualname____doc__r   r   r   r&   rM   ZDatasetr   r   r   r.   ZSummaryWriterr   r'   r   Z	OptimizerZ	schedulesZLearningRateScheduler=   rS   r[   r]   r   rf   r0   r6   r   boolr   r   r   r   r   ZTensorr   functionr   r   r   r   r   staticmethodr   r   r   r   r   r;   r;   r;   r<   r   =   sh   $     


7!$ V "  
 5

	
r   )*r   r   rT   r1   r+   typingr   r   r   r   utilsr   Zintegrationsr   r	   rF   r   Z
tensorflowr&   Z#tensorflow.python.distribute.valuesr
   Zmodeling_tf_utilsr   Zoptimization_tfr   r   Ztrainer_utilsr   r   r   r   r   r   Ztraining_args_tfr   r   rl   rr   Z
get_loggerr   r4   r   r;   r;   r;   r<   <module>   s*    
