U
    9%e\                     @   sT   d Z ddlmZ ddlmZ ddlmZmZ ee	Z
ddiZG dd deeZd	S )
z BiT model configuration   )PretrainedConfig)logging)BackboneConfigMixin*get_aligned_output_features_output_indiceszgoogle/bit-50z=https://huggingface.co/google/bit-50/resolve/main/config.jsonc                       sb   e Zd ZdZdZddgZddgZddd	d
ddgddddgddddddddddf fdd	Z  ZS )	BitConfiga  
    This is the configuration class to store the configuration of a [`BitModel`]. It is used to instantiate an BiT
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the BiT
    [google/bit-50](https://huggingface.co/google/bit-50) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        num_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        embedding_size (`int`, *optional*, defaults to 64):
            Dimensionality (hidden size) for the embedding layer.
        hidden_sizes (`List[int]`, *optional*, defaults to `[256, 512, 1024, 2048]`):
            Dimensionality (hidden size) at each stage.
        depths (`List[int]`, *optional*, defaults to `[3, 4, 6, 3]`):
            Depth (number of layers) for each stage.
        layer_type (`str`, *optional*, defaults to `"preactivation"`):
            The layer to use, it can be either `"preactivation"` or `"bottleneck"`.
        hidden_act (`str`, *optional*, defaults to `"relu"`):
            The non-linear activation function in each block. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"`
            are supported.
        global_padding (`str`, *optional*):
            Padding strategy to use for the convolutional layers. Can be either `"valid"`, `"same"`, or `None`.
        num_groups (`int`, *optional*, defaults to `32`):
            Number of groups used for the `BitGroupNormActivation` layers.
        drop_path_rate (`float`, *optional*, defaults to 0.0):
            The drop path rate for the stochastic depth.
        embedding_dynamic_padding (`bool`, *optional*, defaults to `False`):
            Whether or not to make use of dynamic padding for the embedding layer.
        output_stride (`int`, *optional*, defaults to 32):
            The output stride of the model.
        width_factor (`int`, *optional*, defaults to 1):
            The width factor for the model.
        out_features (`List[str]`, *optional*):
            If used as backbone, list of features to output. Can be any of `"stem"`, `"stage1"`, `"stage2"`, etc.
            (depending on how many stages the model has). If unset and `out_indices` is set, will default to the
            corresponding stages. If unset and `out_indices` is unset, will default to the last stage.
        out_indices (`List[int]`, *optional*):
            If used as backbone, list of indices of features to output. Can be any of 0, 1, 2, etc. (depending on how
            many stages the model has). If unset and `out_features` is set, will default to the corresponding stages.
            If unset and `out_features` is unset, will default to the last stage.

    Example:
    ```python
    >>> from transformers import BitConfig, BitModel

    >>> # Initializing a BiT bit-50 style configuration
    >>> configuration = BitConfig()

    >>> # Initializing a model (with random weights) from the bit-50 style configuration
    >>> model = BitModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```
    bitZpreactivationZ
bottleneckZSAMEZVALIDr   @      i   i   i         ZreluN    g        F   c                    s   t  jf | || jkr4td| dd| j |d k	rd| | jkrT| }ntd| d|| _|| _|| _	|| _
|| _|| _|| _|| _|	| _|
| _|| _|| _dgdd td	t|d	 D  | _t||| jd
\| _| _d S )Nzlayer_type=z is not one of ,zPadding strategy z not supportedstemc                 S   s   g | ]}d | qS )Zstage ).0idxr   r   h/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/transformers/models/bit/configuration_bit.py
<listcomp>   s     z&BitConfig.__init__.<locals>.<listcomp>r   )out_featuresout_indicesstage_names)super__init__layer_types
ValueErrorjoinuppersupported_paddingnum_channelsembedding_sizehidden_sizesdepths
layer_type
hidden_actglobal_padding
num_groupsdrop_path_rateembedding_dynamic_paddingoutput_stridewidth_factorrangelenr   r   Z_out_featuresZ_out_indices)selfr   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r   r   kwargs	__class__r   r   r   \   s2    

$  zBitConfig.__init__)	__name__
__module____qualname____doc__Z
model_typer   r   r   __classcell__r   r   r/   r   r      s&   :

r   N)r4   Zconfiguration_utilsr   utilsr   Zutils.backbone_utilsr   r   Z
get_loggerr1   loggerZ!BIT_PRETRAINED_CONFIG_ARCHIVE_MAPr   r   r   r   r   <module>   s   
 