U
    sVc                     @  s   d Z ddlmZ ddlmZ ddlZddlmZ ddl	m
Z
mZ erPddlmZ dd	d
dddZdddddddZd	d	dddd
dddZdS )zH
Module containing utilities for NDFrame.sample() and .GroupBy.sample()
    )annotations)TYPE_CHECKINGN)lib)ABCDataFrame	ABCSeries)NDFramer   intz
np.ndarray)objaxisreturnc              
   C  s  t |tr|| j| }t |trt | tr||dkrrz| | }W qz tk
rn } ztd|W 5 d}~X Y qzX qtdntdt | tr| j}n| j	}||ddj
}t|| j| krtdt|rtd	|dk  rtd
t|}| r| }d||< |S )z
    Process and validate the `weights` argument to `NDFrame.sample` and
    `.GroupBy.sample`.

    Returns `weights` as an ndarray[np.float64], validated except for normalizing
    weights (because that must be done groupwise in groupby sampling).
    r   z+String passed to weights not a valid columnNzLStrings can only be passed to weights when sampling from rows on a DataFramez@Strings cannot be passed as weights when sampling from a Series.Zfloat64)Zdtypez5Weights and axis to be sampled must be of same lengthz*weight vector may not include `inf` valuesz.weight vector many not include negative values)
isinstancer   ZreindexZaxesstrr   KeyError
ValueErrorZ_constructorZ_constructor_slicedZ_valueslenshaper   Zhas_infsanynpisnancopy)r	   weightsr
   errfuncmissing r   6/tmp/pip-unpacked-wheel-xj8nt62q/pandas/core/sample.pypreprocess_weights   sD    	






r   z
int | Nonezfloat | Nonebool)nfracreplacer   c                 C  s   | dkr|dkrd} nx| dk	r0|dk	r0t dn^| dk	r^| dk rHt d| d dkrt dn0|dk	sjt|dkr~|s~t d|dk rt d| S )	z
    Process and validate the `n` and `frac` arguments to `NDFrame.sample` and
    `.GroupBy.sample`.

    Returns None if `frac` should be used (variable sampling sizes), otherwise returns
    the constant sampling size.
    N   z0Please enter a value for `frac` OR `n`, not bothr   z=A negative number of rows requested. Please provide `n` >= 0.z$Only integers accepted as `n` valueszJReplace has to be set to `True` when upsampling the population `frac` > 1.z@A negative number of rows requested. Please provide `frac` >= 0.)r   AssertionError)r   r   r    r   r   r   process_sampling_sizeN   s*    

r#   znp.ndarray | Nonez+np.random.RandomState | np.random.Generator)obj_lensizer    r   random_stater   c                 C  sH   |dk	r*|  }|dkr"|| }ntd|j| |||djtjddS )ac  
    Randomly sample `size` indices in `np.arange(obj_len)`

    Parameters
    ----------
    obj_len : int
        The length of the indices being considered
    size : int
        The number of values to choose
    replace : bool
        Allow or disallow sampling of the same row more than once.
    weights : np.ndarray[np.float64] or None
        If None, equal probability weighting, otherwise weights according
        to the vector normalized
    random_state: np.random.RandomState or np.random.Generator
        State used for the random sampling

    Returns
    -------
    np.ndarray[np.intp]
    Nr   z$Invalid weights: weights sum to zero)r%   r    pF)r   )sumr   choiceZastyper   Zintp)r$   r%   r    r   r&   Z
weight_sumr   r   r   samples   s    
 r*   )__doc__
__future__r   typingr   Znumpyr   Zpandas._libsr   Zpandas.core.dtypes.genericr   r   Zpandas.core.genericr   r   r#   r*   r   r   r   r   <module>   s   9%