U
    sVc*2                     @  s  d Z ddlmZ ddlmZmZ ddlmZmZm	Z	m
Z
mZmZ ddlZddlZddlmZ ddlmZmZmZ ddlmZ dd	lmZ dd
lmZmZmZmZmZm Z  ddl!Z"ddl#m$Z$ ddl%m&Z& erddl!m'Z'm(Z( dddddddddZ)G dd deZ*G dd de*Z+G dd de*Z,dddddZ-d d!d d"d#d$Z.d d!d d%d&d'Z/d d!d d%d(d)Z0d d!d d*d+d,Z1d dd-d.d/d0Z2dd1d2d3d4Z3dS )5z
Module responsible for execution of NDFrame.describe() method.

Method NDFrame.describe() delegates actual execution to function describe_ndframe().
    )annotations)ABCabstractmethod)TYPE_CHECKINGAnyCallableHashableSequencecastN)	Timestamp)DtypeObjNDFrameTnpt)find_stack_level)validate_percentile)is_bool_dtypeis_complex_dtypeis_datetime64_any_dtypeis_extension_array_dtypeis_numeric_dtypeis_timedelta64_dtype)concat)format_percentiles)	DataFrameSeriesr   str | Sequence[str] | Noneboolz#Sequence[float] | np.ndarray | None)objincludeexcludedatetime_is_numericpercentilesreturnc                 C  sR   t |}| jdkr&ttd| |d}nttd| |||d}|j|d}tt|S )a  Describe series or dataframe.

    Called from pandas.core.generic.NDFrame.describe()

    Parameters
    ----------
    obj: DataFrame or Series
        Either dataframe or series to be described.
    include : 'all', list-like of dtypes or None (default), optional
        A white list of data types to include in the result. Ignored for ``Series``.
    exclude : list-like of dtypes or None (default), optional,
        A black list of data types to omit from the result. Ignored for ``Series``.
    datetime_is_numeric : bool, default False
        Whether to treat datetime dtypes as numeric.
    percentiles : list-like of numbers, optional
        The percentiles to include in the output. All should fall between 0 and 1.
        The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and
        75th percentiles.

    Returns
    -------
    Dataframe or series description.
       r   r   r    r   )r   r   r   r    )r!   )refine_percentilesndimSeriesDescriberr
   DataFrameDescriberdescriber   )r   r   r   r    r!   Z	describerresult r+   8/tmp/pip-unpacked-wheel-xj8nt62q/pandas/core/describe.pydescribe_ndframe6   s    
r-   c                   @  s6   e Zd ZdZddddddZeddd	d
dZdS )NDFrameDescriberAbstractzAbstract class for describing dataframe or series.

    Parameters
    ----------
    obj : Series or DataFrame
        Object to be described.
    datetime_is_numeric : bool
        Whether to treat datetime dtypes as numeric.
    zDataFrame | Seriesr   None)r   r    r"   c                 C  s   || _ || _d S Nr$   )selfr   r    r+   r+   r,   __init__t   s    z!NDFrameDescriberAbstract.__init__Sequence[float] | np.ndarrayr!   r"   c                 C  s   dS )zDo describe either series or dataframe.

        Parameters
        ----------
        percentiles : list-like of numbers
            The percentiles to include in the output.
        Nr+   )r1   r!   r+   r+   r,   r)   x   s    z!NDFrameDescriberAbstract.describeN)__name__
__module____qualname____doc__r2   r   r)   r+   r+   r+   r,   r.   i   s   
r.   c                   @  s*   e Zd ZU dZded< dddddZdS )	r'   z2Class responsible for creating series description.r   r   r3   r4   c                 C  s   t | j| j}|| j|S r0   )select_describe_funcr   r    )r1   r!   describe_funcr+   r+   r,   r)      s
    zSeriesDescriber.describeN)r5   r6   r7   r8   __annotations__r)   r+   r+   r+   r,   r'      s   
r'   c                      sF   e Zd ZdZdddddd fddZd	dd
ddZdd Z  ZS )r(   a  Class responsible for creating dataobj description.

    Parameters
    ----------
    obj : DataFrame
        DataFrame to be described.
    include : 'all', list-like of dtypes or None
        A white list of data types to include in the result.
    exclude : list-like of dtypes or None
        A black list of data types to omit from the result.
    datetime_is_numeric : bool
        Whether to treat datetime dtypes as numeric.
    r   r   r   r/   )r   r   r   r    r"   c                  s>   || _ || _|jdkr*|jjdkr*tdt j||d d S )N   r   z+Cannot describe a DataFrame without columns)r    )r   r   r&   columnssize
ValueErrorsuperr2   )r1   r   r   r   r    	__class__r+   r,   r2      s
    zDataFrameDescriber.__init__r3   r4   c                   sn   |   }g }| D ]$\}}t|| j}|||| qt| t fdd|D ddd}|j |_|S )Nc                   s   g | ]}|j  d dqS )F)copy)Zreindex.0xZ	col_namesr+   r,   
<listcomp>   s     z/DataFrameDescriber.describe.<locals>.<listcomp>r#   F)Zaxissort)	_select_dataitemsr9   r    appendreorder_columnsr   r=   rC   )r1   r!   dataldesc_seriesr:   dr+   rG   r,   r)      s    zDataFrameDescriber.describec                 C  s   | j dkrP| jdkrPtjg}| jr,|d | jj|d}t|j	dkr| j}n<| j dkrx| jdk	rpd}t
|| j}n| jj| j | jd}|S )zSelect columns to be described.Ndatetime)r   r   allz*exclude must be None when include is 'all')r   r   )r   r   npnumberr    rL   r   Zselect_dtypeslenr=   r?   )r1   Zdefault_includerN   msgr+   r+   r,   rJ      s"    


zDataFrameDescriber._select_data)r5   r6   r7   r8   r2   r)   rJ   __classcell__r+   r+   rA   r,   r(      s   r(   zSequence[Series]zlist[Hashable])rO   r"   c                 C  sD   g }t dd | D td}|D ] }|D ]}||kr&|| q&q|S )z,Set a convenient order for rows for display.c                 s  s   | ]}|j V  qd S r0   )indexrD   r+   r+   r,   	<genexpr>   s     z"reorder_columns.<locals>.<genexpr>)key)sortedrW   rL   )rO   namesZldesc_indexesZidxnamesnamer+   r+   r,   rM      s    rM   r   zSequence[float])rQ   r!   r"   c                 C  s   ddl m} t|}ddddg| dg }|  |  |  |  g| |  | 	 g }t
| rpt }n t| rt| std}nd	}|||| j|d
S )zDescribe series containing numerical data.

    Parameters
    ----------
    series : Series
        Series to be described.
    percentiles : list-like of numbers
        The percentiles to include in the output.
    r   r   countmeanstdminmaxfloatNrZ   r_   dtype)pandasr   r   ra   rb   rc   rd   quantiletolistre   r   pdZFloat64Dtyper   r   rU   rh   r_   )rQ   r!   r   formatted_percentiles
stat_indexrR   rh   r+   r+   r,   describe_numeric_1d   s    

ro   )rN   percentiles_ignoredr"   c           
      C  s   ddddg}|   }t||dk }|dkrH|jd |jd  }}d}ntjtj }}d}|  |||g}ddlm}	 |	||| j	|d	S )
zDescribe series containing categorical data.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles_ignored : list-like of numbers
        Ignored, but in place to unify interface.
    ra   uniquetopfreqr   Nobjectr`   rg   )
value_countsrW   rZ   ilocrU   nanra   ri   r   r_   )
rN   rp   r^   	objcountscount_uniquerr   rs   rh   r*   r   r+   r+   r,   describe_categorical_1d  s    rz   c                 C  s  ddg}|   }t||dk }|  |g}d}|dkr|jd |jd  }}| jj}	|  j	d}
t
|}|jdk	r|	dk	r||	}n
||	}|dddd	g7 }|||t
|
 |	d
t
|
 |	d
g7 }n |ddg7 }|tjtjg7 }d}ddlm} |||| j|dS )zDescribe series containing timestamp data treated as categorical.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles_ignored : list-like of numbers
        Ignored, but in place to unify interface.
    ra   rq   r   Ni8rr   rs   firstlast)tzrt   r`   rg   )ru   rW   ra   rZ   rv   dtr~   Zdropnavaluesviewr   tzinfoZ
tz_convertZtz_localizerd   re   rU   rw   ri   r   r_   )rN   rp   r^   rx   ry   r*   rh   rr   rs   r~   Zasintr   r+   r+   r,   $describe_timestamp_as_categorical_1d!  s2    

r   )rN   r!   r"   c                 C  sf   ddl m} t|}dddg| dg }|  |  |  g| |  |  g }|||| j	dS )zDescribe series containing datetime64 dtype.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles : list-like of numbers
        The percentiles to include in the output.
    r   r`   ra   rb   rd   re   )rZ   r_   )
ri   r   r   ra   rb   rd   rj   rk   re   r_   )rN   r!   r   rm   rn   rR   r+   r+   r,   describe_timestamp_1dQ  s    r   r   )rN   r    r"   c                 C  sZ   t | jrtS t| rtS t| jrD|r,tS tjdt	t
 d tS nt| jrRtS tS dS )zSelect proper function for describing series based on data type.

    Parameters
    ----------
    data : Series
        Series to be described.
    datetime_is_numeric : bool
        Whether to treat datetime dtypes as numeric.
    zTreating datetime data as categorical rather than numeric in `.describe` is deprecated and will be removed in a future version of pandas. Specify `datetime_is_numeric=True` to silence this warning and adopt the future behavior now.)
stacklevelN)r   rh   rz   r   ro   r   r   warningswarnFutureWarningr   r   r   )rN   r    r+   r+   r,   r9   i  s     


r9   z%np.ndarray[Any, np.dtype[np.float64]]r4   c                 C  sv   | dkrt dddgS t| } t|  d| kr:| d t | } t | }| dk	sZtt|t| k rrt	d|S )z
    Ensure that percentiles are unique and sorted.

    Parameters
    ----------
    percentiles : list-like of numbers, optional
        The percentiles to include in the output.
    Ng      ?g      ?g      ?z%percentiles cannot contain duplicates)
rU   arraylistr   rL   Zasarrayrq   AssertionErrorrW   r?   )r!   Zunique_pctsr+   r+   r,   r%     s    


r%   )4r8   
__future__r   abcr   r   typingr   r   r   r   r	   r
   r   ZnumpyrU   Zpandas._libs.tslibsr   Zpandas._typingr   r   r   Zpandas.util._exceptionsr   Zpandas.util._validatorsr   Zpandas.core.dtypes.commonr   r   r   r   r   r   ri   rl   Zpandas.core.reshape.concatr   Zpandas.io.formats.formatr   r   r   r-   r.   r'   r(   rM   ro   rz   r   r   r9   r%   r+   r+   r+   r,   <module>   s4     	3G 0$