U
    sVc*                     @  s>  d Z ddlmZ ddlmZmZmZ ddlZddlm	  m
Z
 ddlmZmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZ dd	lmZ dd
l m!Z! ddl"m#  m$Z% erddl m&Z& ddl'm(Z( e
j)Z)dZ*dddddZ+dd Z,ddddZ-ddddZ.d%dddddd d!d"Z/d#d$ Z0dS )&zM
Table Schema builders

https://specs.frictionlessdata.io/json-table-schema/
    )annotations)TYPE_CHECKINGAnycastN)DtypeObjJSONSerializable)find_stack_level)	_registry)
is_bool_dtypeis_categorical_dtypeis_datetime64_dtypeis_datetime64tz_dtypeis_extension_array_dtypeis_integer_dtypeis_numeric_dtypeis_period_dtypeis_string_dtypeis_timedelta64_dtype)CategoricalDtype)	DataFrame)Series)
MultiIndexz1.4.0r   str)xreturnc                 C  sx   t | rdS t| rdS t| r$dS t| s<t| s<t| r@dS t| rLdS t| rXdS t| rddS t	| rpdS dS dS )	a  
    Convert a NumPy / pandas type to its corresponding json_table.

    Parameters
    ----------
    x : np.dtype or ExtensionDtype

    Returns
    -------
    str
        the Table Schema data types

    Notes
    -----
    This table shows the relationship between NumPy / pandas dtypes,
    and Table Schema dtypes.

    ==============  =================
    Pandas type     Table Schema type
    ==============  =================
    int64           integer
    float64         number
    bool            boolean
    datetime64[ns]  datetime
    timedelta64[ns] duration
    object          str
    categorical     any
    =============== =================
    integerbooleannumberdatetimedurationanystringN)
r   r
   r   r   r   r   r   r   r   r   )r    r"   @/tmp/pip-unpacked-wheel-xj8nt62q/pandas/io/json/_table_schema.pyas_json_table_type1   s"    r$   c                 C  s   t j| jj rr| jj}t|dkr@| jjdkr@tjdt d n.t|dkrnt	dd |D rntjdt d | S | 
 } | jjdkrt | jj| j_n| jjpd| j_| S )z?Sets index names to 'index' for regular, or 'level_x' for Multi   indexz-Index name of 'index' is not round-trippable.)
stacklevelc                 s  s   | ]}| d V  qdS Zlevel_N
startswith.0r   r"   r"   r#   	<genexpr>l   s     z$set_default_names.<locals>.<genexpr>z<Index names beginning with 'level_' are not round-trippable.)comZall_not_noner&   nameslennamewarningswarnr   r    copynlevelsZfill_missing_names)dataZnmsr"   r"   r#   set_default_namesc   s$    r7   zdict[str, JSONSerializable])r   c                 C  s   | j }| jd krd}n| j}|t|d}t|rX|j}|j}dt|i|d< ||d< n>t|rn|jj	|d< n(t
|r|jj|d< nt|r|j|d< |S )	Nvalues)r1   typeenumconstraintsorderedfreqtzextDtype)dtyper1   r$   r   
categoriesr<   listr   r=   Zfreqstrr   r>   zoner   )Zarrr@   r1   fieldZcatsr<   r"   r"   r#   !convert_pandas_type_to_json_field{   s&    


rE   zstr | CategoricalDtypec                 C  s   | d }|dkrdS |dkr dS |dkr,dS |dkr8d	S |d
krDdS |dkr|  drfd| d  dS |  drd| d  dS dS nJ|dkrd| krd| krt| d d | d dS d| krt| d S dS td| dS )a  
    Converts a JSON field descriptor into its corresponding NumPy / pandas type

    Parameters
    ----------
    field
        A JSON field descriptor

    Returns
    -------
    dtype

    Raises
    ------
    ValueError
        If the type of the provided field is unknown or currently unsupported

    Examples
    --------
    >>> convert_json_field_to_pandas_type({"name": "an_int", "type": "integer"})
    'int64'

    >>> convert_json_field_to_pandas_type(
    ...     {
    ...         "name": "a_categorical",
    ...         "type": "any",
    ...         "constraints": {"enum": ["a", "b", "c"]},
    ...         "ordered": True,
    ...     }
    ... )
    CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)

    >>> convert_json_field_to_pandas_type({"name": "a_datetime", "type": "datetime"})
    'datetime64[ns]'

    >>> convert_json_field_to_pandas_type(
    ...     {"name": "a_datetime_with_tz", "type": "datetime", "tz": "US/Central"}
    ... )
    'datetime64[ns, US/Central]'
    r9   r!   objectr   Zint64r   Zfloat64r   boolr   timedelta64r   r>   zdatetime64[ns, ]r=   zperiod[zdatetime64[ns]r    r;   r<   r:   )rA   r<   r?   z#Unsupported or invalid field type: N)getr   registryfind
ValueError)rD   typr"   r"   r#   !convert_json_field_to_pandas_type   s6    )


 rO   TzDataFrame | SeriesrG   zbool | None)r6   r&   primary_keyversionr   c                 C  s"  |dkrt | } i }g }|r~| jjdkrntd| j| _t| jj| jjD ]"\}}t|}||d< || qHn|t| j | j	dkr| 
 D ]\}	}
|t|
 qn|t|  ||d< |r| jjr|dkr| jjdkr| jjg|d< n| jj|d< n|dk	r||d< |rt|d< |S )	aG  
    Create a Table schema from ``data``.

    Parameters
    ----------
    data : Series, DataFrame
    index : bool, default True
        Whether to include ``data.index`` in the schema.
    primary_key : bool or None, default True
        Column names to designate as the primary key.
        The default `None` will set `'primaryKey'` to the index
        level or levels if the index is unique.
    version : bool, default True
        Whether to include a field `pandas_version` with the version
        of pandas that last revised the table schema. This version
        can be different from the installed pandas version.

    Returns
    -------
    schema : dict

    Notes
    -----
    See `Table Schema
    <https://pandas.pydata.org/docs/user_guide/io.html#table-schema>`__ for
    conversion types.
    Timedeltas as converted to ISO8601 duration format with
    9 decimal places after the seconds field for nanosecond precision.

    Categoricals are converted to the `any` dtype, and use the `enum` field
    constraint to list the allowed values. The `ordered` attribute is included
    in an `ordered` field.

    Examples
    --------
    >>> df = pd.DataFrame(
    ...     {'A': [1, 2, 3],
    ...      'B': ['a', 'b', 'c'],
    ...      'C': pd.date_range('2016-01-01', freq='d', periods=3),
    ...     }, index=pd.Index(range(3), name='idx'))
    >>> build_table_schema(df)
    {'fields': [{'name': 'idx', 'type': 'integer'}, {'name': 'A', 'type': 'integer'}, {'name': 'B', 'type': 'string'}, {'name': 'C', 'type': 'datetime'}], 'primaryKey': ['idx'], 'pandas_version': '1.4.0'}
    Tr%   r   r1   fieldsN
primaryKeyZpandas_version)r7   r&   r5   r   ziplevelsr/   rE   appendndimitemsZ	is_uniquer1   TABLE_SCHEMA_VERSION)r6   r&   rP   rQ   schemarR   levelr1   Z	new_fieldcolumnsr"   r"   r#   build_table_schema   s4    7

r^   c                 C  s   t | |d}dd |d d D }t|d |d| }dd	 |d d D }d
| kr`td||}d|d kr||d d }t|jjdkr|jj	dkrd|j_	ndd |jjD |j_|S )a  
    Builds a DataFrame from a given schema

    Parameters
    ----------
    json :
        A JSON table schema
    precise_float : bool
        Flag controlling precision when decoding string to double values, as
        dictated by ``read_json``

    Returns
    -------
    df : DataFrame

    Raises
    ------
    NotImplementedError
        If the JSON table schema contains either timezone or timedelta data

    Notes
    -----
        Because :func:`DataFrame.to_json` uses the string 'index' to denote a
        name-less :class:`Index`, this function sets the name of the returned
        :class:`DataFrame` to ``None`` when said string is encountered with a
        normal :class:`Index`. For a :class:`MultiIndex`, the same limitation
        applies to any strings beginning with 'level_'. Therefore, an
        :class:`Index` name of 'index'  and :class:`MultiIndex` names starting
        with 'level_' are not supported.

    See Also
    --------
    build_table_schema : Inverse function.
    pandas.read_json
    )precise_floatc                 S  s   g | ]}|d  qS r1   r"   r,   rD   r"   r"   r#   
<listcomp>_  s     z&parse_table_schema.<locals>.<listcomp>rZ   rR   r6   )columnsc                 S  s   i | ]}|d  t |qS r`   )rO   ra   r"   r"   r#   
<dictcomp>b  s    z&parse_table_schema.<locals>.<dictcomp>rH   z<table="orient" can not yet read ISO-formatted Timedelta datarS   r%   r&   Nc                 S  s   g | ]}| d rdn|qS r(   r)   r+   r"   r"   r#   rb   u  s    )
loadsr   r8   NotImplementedErrorZastypeZ	set_indexr0   r&   r/   r1   )jsonr_   tableZ	col_orderZdfZdtypesr"   r"   r#   parse_table_schema:  s(    $



ri   )TNT)1__doc__
__future__r   typingr   r   r   r2   Zpandas._libs.jsonZ_libsrg   Zpandas._typingr   r   Zpandas.util._exceptionsr   Zpandas.core.dtypes.baser	   rK   Zpandas.core.dtypes.commonr
   r   r   r   r   r   r   r   r   r   Zpandas.core.dtypes.dtypesr   Zpandasr   Zpandas.core.commoncorecommonr.   r   Zpandas.core.indexes.multir   re   rY   r$   r7   rE   rO   r^   ri   r"   r"   r"   r#   <module>   s2   02K   [