U
    襡c$U                     @   s*  d dl mZmZmZmZmZmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z? d dl@mAZA d dlBZBd dlCmDZD d dlEZEd dlFZGd dlFmHZH d dlImJZJ dd ZKeAd	d
ZLdd ZMdd ZNdd ZOdd ZPdd ZQdd ZRdd ZSdd ZTeT  dddfddZUddfddddZVd dd!d"d#ZWd$d% ZXdfddd&d'ZYdfddd(d)ZZd*ddd+d,d-Z[d.d/ Z\d0d1 Z]dS )2    )?FunctionFunctionOptionsFunctionRegistryHashAggregateFunctionHashAggregateKernelKernelScalarAggregateFunctionScalarAggregateKernelScalarFunctionScalarKernelVectorFunctionVectorKernelArraySortOptionsAssumeTimezoneOptionsCastOptionsCountOptionsCumulativeSumOptionsDayOfWeekOptionsDictionaryEncodeOptionsElementWiseAggregateOptionsExtractRegexOptionsFilterOptionsIndexOptionsJoinOptionsMakeStructOptionsMapLookupOptionsMatchSubstringOptionsModeOptionsNullOptions
PadOptionsPartitionNthOptionsQuantileOptionsRandomOptionsRankOptionsReplaceSliceOptionsReplaceSubstringOptionsRoundOptionsRoundTemporalOptionsRoundToMultipleOptionsScalarAggregateOptionsSelectKOptionsSetLookupOptionsSliceOptionsSortOptionsSplitOptionsSplitPatternOptionsStrftimeOptionsStrptimeOptionsStructFieldOptionsTakeOptionsTDigestOptionsTrimOptionsUtf8NormalizeOptionsVarianceOptionsWeekOptionscall_functionfunction_registryget_functionlist_functions	_group_byregister_scalar_functionScalarUdfContext
Expression)
namedtupleN)dedent)_compute_docstrings)	docscrapec                 C   s   | j jS )N)_doc	arg_names)func rH   3/tmp/pip-unpacked-wheel-qoi2rb4q/pyarrow/compute.py_get_arg_namesb   s    rJ   _OptionsClassDoc)paramsc                 C   s"   | j s
d S t| j }t|d S )NZ
Parameters)__doc__rD   ZNumpyDocStringrK   )options_classdocrH   rH   rI   _scrape_options_class_doci   s    rP   c                 C   s  |j }t|j|j|j|jd| _|| _|| _g }|j	}|sZ|jdkrHdnd}d
|j|}|| d |j}|r|| d tj|j}	|td t|}
|
D ]8}|jd	krd
}nd}|| d| d |d q|d k	rt|}|rH|jD ]@}||j d|j d |jD ]}|d| d q(qnLtd|j dt t|}|j D ] }|td
|j|j qr|td|j d |td |	d k	r|d
t|	d d|| _ | S )N)namearityrN   options_required   	argumentsargumentz,Call compute function {!r} with the given {}z.

z

z.        Parameters
        ----------
        )ZvectorZscalar_aggregatez
Array-likezArray-like or scalar-likez : 
z"    Argument to compute function.
z    zOptions class z does not have a docstringz                {0} : optional
                    Parameter for {1} constructor. Either `options`
                    or `{0}` can be passed, but not both at the same time.
                z&            options : pyarrow.compute.zK, optional
                Alternative way of passing options.
            z        memory_pool : pyarrow.MemoryPool, optional
            If not passed, will allocate memory from the default memory pool.
        z
{}
 )!rE   dictrQ   rR   rN   rS   Z__arrow_compute_function____name____qualname__summaryformatappenddescriptionrC   Zfunction_doc_additionsgetrB   rJ   kindrP   rL   typedescwarningswarnRuntimeWarninginspect	signature
parametersvaluesstripjoinrM   )wrapperZexposed_namerG   rN   Zcpp_docZ
doc_piecesr\   Zarg_strr_   Zdoc_additionrF   Zarg_nameZarg_typeZoptions_class_docpsoptions_sigrH   rH   rI   _decorate_compute_functionp   sl     





 
rq   c                 C   sJ   | j j}|sd S zt | W S  tk
rD   td|t Y d S X d S )Nz!Python binding for {} not exposed)rE   rN   globalsKeyErrorrd   re   r]   rf   )rG   
class_namerH   rH   rI   _get_options_class   s    ru   c                 C   sl   |s|r(|d k	rt d| |||S |d k	rht|trD|f |S t||rR|S t d| |t|d S )NzMFunction {!r} called with both an 'options' argument and additional argumentsz-Function {!r} expected a {} parameter, got {})	TypeErrorr]   
isinstancerY   rb   )rQ   rN   optionsargskwargsrH   rH   rI   _handle_options   s*    



  r{   c                    s>   d kr d d fdd
}nd d d fdd
}|S )Nmemory_poolc                    sb    t k	r2t| kr2t d  dt| d|rTt|d trTtt|S |d | S )N takes  positional argument(s), but  were givenr   )Ellipsislenrv   rw   r@   _calllistcall)r}   ry   )rR   rG   	func_namerH   rI   rm      s    z&_make_generic_wrapper.<locals>.wrapper)r}   rx   c                    s    t k	rLt| k r2t d  dt| d| d  }|d   }nd}t|||}|rt|d trtt||S ||| S )Nr~   r   r   rH   r   )	r   r   rv   r{   rw   r@   r   r   r   )r}   rx   ry   rz   Zoption_argsrR   rG   r   rN   rH   rI   rm      s     rH   )r   rG   rN   rR   rm   rH   r   rI   _make_generic_wrapper   s    
r   c                 C   s   ddl m} g }| D ]}||||j q|D ]}||||j q0|d k	rt |}|j D ]6}|j|j	|j
fks~t|r|j|j
d}|| qd||d|j
d d ||d|j
d d t |S )Nr   )	Parameter)ra   rx   )defaultr}   )rg   r   r^   POSITIONAL_ONLYVAR_POSITIONALrh   ri   rj   ra   POSITIONAL_OR_KEYWORDKEYWORD_ONLYAssertionErrorreplace	Signature)rF   var_arg_namesrN   r   rL   rQ   rp   rn   rH   rH   rI   _make_signature  s,    

r   c                 C   sj   t |}t|}|o |d d}|r8| dg}ng }t| |||jd}t||||_t	|| ||S )N*)rR   )
ru   rJ   
startswithpoplstripr   rR   r   __signature__rq   )rQ   rG   rN   rF   Z
has_varargr   rm   rH   rH   rI   _wrap_function  s        r   c                  C   sp   t  } t }ddd}| D ]L}|||}||}|jdkrDq|| ksTt|t|| | |< | |< qdS )z
    Make global functions wrapping each compute function.

    Note that some of the automatically-generated wrappers may be overridden
    by custom versions below.
    and_or_)andorZhash_aggregateN)rr   r:   r<   r`   r;   ra   r   r   )gregZrewritesZcpp_namerQ   rG   rH   rH   rI   _make_global_functions*  s    

r   c                 C   sf   |dk	p|dk	}|r$|dk	r$t d|dkrXtjj|}|dkrNt|}n
t|}td| g|S )a  
    Cast array values to another data type. Can also be invoked as an array
    instance method.

    Parameters
    ----------
    arr : Array-like
    target_type : DataType or str
        Type to cast to
    safe : bool, default True
        Check for overflows or other unsafe conversions
    options : CastOptions, default None
        Additional checks pass by CastOptions

    Examples
    --------
    >>> from datetime import datetime
    >>> import pyarrow as pa
    >>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)])
    >>> arr.type
    TimestampType(timestamp[us])

    You can use ``pyarrow.DataType`` objects to specify the target type:

    >>> cast(arr, pa.timestamp('ms'))
    <pyarrow.lib.TimestampArray object at ...>
    [
      2010-01-01 00:00:00.000,
      2015-01-01 00:00:00.000
    ]

    >>> cast(arr, pa.timestamp('ms')).type
    TimestampType(timestamp[ms])

    Alternatively, it is also supported to use the string aliases for these
    types:

    >>> arr.cast('timestamp[ms]')
    <pyarrow.lib.TimestampArray object at ...>
    [
      2010-01-01 00:00:00.000,
      2015-01-01 00:00:00.000
    ]
    >>> arr.cast('timestamp[ms]').type
    TimestampType(timestamp[ms])

    Returns
    -------
    casted : Array
    NzRMust either pass values for 'target_type' and 'safe' or pass a value for 'options'Fcast)	
ValueErrorpatypeslibensure_typer   Zunsafesafer9   )ZarrZtarget_typer   rx   Zsafe_vars_passedrH   rH   rI   r   F  s    3
r   r|   c                C   s   |dk	r.|dk	r"|  ||| } qB|  |} n|dk	rB|  d|} t|tjs`tj|| jd}n | j|jkrtj| | jd}t|d}td| g||}|dk	r| dkrtj| | t	 d}|S )a  
    Find the index of the first occurrence of a given value.

    Parameters
    ----------
    data : Array-like
    value : Scalar-like object
        The value to search for.
    start : int, optional
    end : int, optional
    memory_pool : MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.

    Returns
    -------
    index : int
        the index, or -1 if not found
    Nr   rb   valueindex)
slicerw   r   Scalarscalarrb   as_pyr   r9   Zint64)datar   startendr}   rx   resultrH   rH   rI   r     s    
r   T)boundscheckr}   c                C   s   t |d}td| |g||S )ah  
    Select values (or records) from array- or table-like data given integer
    selection indices.

    The result will be of the same type(s) as the input, with elements taken
    from the input array (or record batch / table fields) at the given
    indices. If an index is null then the corresponding value in the output
    will be null.

    Parameters
    ----------
    data : Array, ChunkedArray, RecordBatch, or Table
    indices : Array, ChunkedArray
        Must be of integer type
    boundscheck : boolean, default True
        Whether to boundscheck the indices. If False and there is an out of
        bounds index, will likely cause the process to crash.
    memory_pool : MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.

    Returns
    -------
    result : depends on inputs

    Examples
    --------
    >>> import pyarrow as pa
    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
    >>> indices = pa.array([0, None, 4, 3])
    >>> arr.take(indices)
    <pyarrow.lib.StringArray object at ...>
    [
      "a",
      null,
      "e",
      null
    ]
    )r   take)r3   r9   )r   indicesr   r}   rx   rH   rH   rI   r     s    '
r   c                 C   sV   t |tjtjtjfs(tj|| jd}n | j|jkrHtj| | jd}td| |gS )aQ  
    Replace each null element in values with fill_value. The fill_value must be
    the same type as values or able to be implicitly casted to the array's
    type.

    This is an alias for :func:`coalesce`.

    Parameters
    ----------
    values : Array, ChunkedArray, or Scalar-like object
        Each null element is replaced with the corresponding value
        from fill_value.
    fill_value : Array, ChunkedArray, or Scalar-like object
        If not same type as data will attempt to cast.

    Returns
    -------
    result : depends on inputs

    Examples
    --------
    >>> import pyarrow as pa
    >>> arr = pa.array([1, 2, None, 3], type=pa.int8())
    >>> fill_value = pa.scalar(5, type=pa.int8())
    >>> arr.fill_null(fill_value)
    <pyarrow.lib.Int8Array object at ...>
    [
      1,
      2,
      5,
      3
    ]
    r   Zcoalesce)	rw   r   ArrayChunkedArrayr   r   rb   r   r9   )rj   Z
fill_valuerH   rH   rI   	fill_null  s
    "r   c                C   sR   |dkrg }t | tjtjfr*|d ntdd |}t||}td| g||S )a  
    Select the indices of the top-k ordered elements from array- or table-like
    data.

    This is a specialization for :func:`select_k_unstable`. Output is not
    guaranteed to be stable.

    Parameters
    ----------
    values : Array, ChunkedArray, RecordBatch, or Table
        Data to sort and get top indices from.
    k : int
        The number of `k` elements to keep.
    sort_keys : List-like
        Column key names to order by when input is table-like data.
    memory_pool : MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.

    Returns
    -------
    result : Array of indices

    Examples
    --------
    >>> import pyarrow as pa
    >>> import pyarrow.compute as pc
    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
    >>> pc.top_k_unstable(arr, k=3)
    <pyarrow.lib.UInt64Array object at ...>
    [
      5,
      4,
      2
    ]
    N)dummy
descendingc                 S   s   | dfS )Nr   rH   Zkey_namerH   rH   rI   <lambda>,      z top_k_unstable.<locals>.<lambda>select_k_unstablerw   r   r   r   r^   mapr*   r9   rj   k	sort_keysr}   rx   rH   rH   rI   top_k_unstable  s    $
r   c                C   sR   |dkrg }t | tjtjfr*|d ntdd |}t||}td| g||S )a  
    Select the indices of the bottom-k ordered elements from
    array- or table-like data.

    This is a specialization for :func:`select_k_unstable`. Output is not
    guaranteed to be stable.

    Parameters
    ----------
    values : Array, ChunkedArray, RecordBatch, or Table
        Data to sort and get bottom indices from.
    k : int
        The number of `k` elements to keep.
    sort_keys : List-like
        Column key names to order by when input is table-like data.
    memory_pool : MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.

    Returns
    -------
    result : Array of indices

    Examples
    --------
    >>> import pyarrow as pa
    >>> import pyarrow.compute as pc
    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
    >>> pc.bottom_k_unstable(arr, k=3)
    <pyarrow.lib.UInt64Array object at ...>
    [
      0,
      1,
      2
    ]
    N)r   	ascendingc                 S   s   | dfS )Nr   rH   r   rH   rH   rI   r   Z  r   z#bottom_k_unstable.<locals>.<lambda>r   r   r   rH   rH   rI   bottom_k_unstable1  s    $
r   system)initializerrx   r}   c                C   s   t |d}tdg ||| dS )aB  
    Generate numbers in the range [0, 1).

    Generated values are uniformly-distributed, double-precision
    in range [0, 1). Algorithm and seed can be changed via RandomOptions.

    Parameters
    ----------
    n : int
        Number of values to generate, must be greater than or equal to 0
    initializer : int or str
        How to initialize the underlying random generator.
        If an integer is given, it is used as a seed.
        If "system" is given, the random generator is initialized with
        a system-specific source of (hopefully true) randomness.
        Other values are invalid.
    options : pyarrow.compute.RandomOptions, optional
        Alternative way of passing options.
    memory_pool : pyarrow.MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.
    )r   random)length)r"   r9   )nr   rx   r}   rH   rH   rI   r   _  s    
r   c                  G   sr   t | }|dkrdt| d ttfr0t| d S t| d trLt| d S tdt	| d  n
t| S dS )a  Reference a column of the dataset.

    Stores only the field's name. Type and other information is known only when
    the expression is bound to a dataset having an explicit scheme.

    Nested references are allowed by passing multiple names or a tuple of
    names. For example ``('foo', 'bar')`` references the field named "bar"
    inside the field named "foo".

    Parameters
    ----------
    *name_or_index : string, multiple strings, tuple or int
        The name or index of the (possibly nested) field the expression
        references to.

    Returns
    -------
    field_expr : Expression

    Examples
    --------
    >>> import pyarrow.compute as pc
    >>> pc.field("a")
    <pyarrow.compute.Expression a>
    >>> pc.field(1)
    <pyarrow.compute.Expression FieldPath(1)>
    >>> pc.field(("a", "b"))
    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
    >>> pc.field("a", "b")
    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
    rT   r   zCfield reference should be str, multiple str, tuple or integer, got N)
r   rw   strintr@   Z_fieldtupleZ_nested_fieldrv   rb   )Zname_or_indexr   rH   rH   rI   fieldy  s     r   c                 C   s
   t | S )a  Expression representing a scalar value.

    Parameters
    ----------
    value : bool, int, float or string
        Python value of the scalar. Note that only a subset of types are
        currently supported.

    Returns
    -------
    scalar_expr : Expression
    )r@   Z_scalarr   rH   rH   rI   r     s    r   )^Zpyarrow._computer   r   r   r   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   collectionsrA   rg   textwraprB   rd   Zpyarrowr   rC   Zpyarrow.vendoredrD   rJ   rK   rP   rq   ru   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   rH   rH   rH   rI   <module>   s8    F
SB&+*..0