U
    sVcL<                     @  s  d dl mZ d dlmZ d dlmZmZmZmZ d dl	Z	d dl
Zd dlm  mZ d dlmZmZmZmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlm Z  d dl!m"Z"m#Z# erd dl$m%Z%m&Z& G dd de"Z'dddddZ(dddddZ)dS )    )annotations)defaultdict)TYPE_CHECKINGHashableMappingSequenceN)	ArrayLikeDtypeArgDtypeObjReadCsvBuffer)DtypeWarning)find_stack_level)is_categorical_dtypepandas_dtype)union_categoricals)ExtensionDtype)ensure_index_from_sequences)
ParserBaseis_index_col)Index
MultiIndexc                      s   e Zd ZU ded< ded< ddd fdd	Zdd
ddZdd
ddZd dddddZdddddZdd Z	d!dddddZ
  ZS )"CParserWrapperbool
low_memoryzparsers.TextReader_readerzReadCsvBuffer[str]None)srcreturnc                   s  t  | | _| }|dd _ jdk	|d<  j|d<  jj	|d< dD ]}||d  qPt
|dd |d< tj|f| _ jj _ jd k} jjd krd  _n"  jj j|\ _ _ _} jd kr jr fdd	t jjD  _ntt jj _ jd d   _ jrȈ  j j jd k	sLt jd
krxt jsx  j t  jt krfdd	t! jD  _t  jt k rȈ  j  " j  #   j _ j$sn jj%dkr8t& jr8d _' ( j j\} _ _ jd kr8| _ jjd krn|sn jd k	s\td gt  j  _ jj%dk _)d S )Nr   FZallow_leading_colsusecolson_bad_lines)Zstorage_optionsencodingZ
memory_mapcompressionZerror_bad_linesZwarn_bad_linesdtypec                   s   g | ]} j  | qS  )prefix).0iselfr#   F/tmp/pip-unpacked-wheel-xj8nt62q/pandas/io/parsers/c_parser_wrapper.py
<listcomp>j   s    z+CParserWrapper.__init__.<locals>.<listcomp>stringc                   s$   g | ]\}}| ks| kr|qS r#   r#   )r%   r&   nr   r#   r)   r*      s    r   T)*super__init__kwdscopypopr   	index_colr   r   valueensure_dtype_objsgetparsersZ
TextReaderr   Zunnamed_colsnamesheaderZ_extract_multi_indexer_columnsindex_names	col_namesr$   rangeZtable_widthlist
orig_names_evaluate_usecolsAssertionErrorZusecols_dtypesetissubsetZ_validate_usecols_nameslen	enumerateZ_validate_parse_dates_presence_set_noconvert_columns_has_complex_date_colleading_colsr   Z_name_processed_clean_index_namesZ_implicit_index)r(   r   r0   keyZpassed_namesr:   	__class__)r(   r   r)   r/   1   s    


	




zCParserWrapper.__init__)r   c                 C  s(   z| j   W n tk
r"   Y nX d S N)r   close
ValueErrorr'   r#   r#   r)   rM      s    zCParserWrapper.closec                   s^   | j dk	stdd t| j D   fdd| jD }| || j}|D ]}| j| qHdS )z
        Set the columns that should not undergo dtype conversions.

        Currently, any column that is involved with date parsing will not
        undergo such conversions.
        Nc                 S  s   i | ]\}}||qS r#   r#   )r%   r&   xr#   r#   r)   
<dictcomp>   s      z9CParserWrapper._set_noconvert_columns.<locals>.<dictcomp>c                   s   g | ]} | qS r#   r#   r%   rO   Z
names_dictr#   r)   r*      s     z9CParserWrapper._set_noconvert_columns.<locals>.<listcomp>)r>   r@   rD   r8   Z_set_noconvert_dtype_columnsr   Zset_noconvert)r(   Zcol_indicesZnoconvert_columnscolr#   rR   r)   rE      s    z%CParserWrapper._set_noconvert_columnsNz
int | Nonez_tuple[Index | MultiIndex | None, Sequence[Hashable] | MultiIndex, Mapping[Hashable, ArrayLike]])nrowsr   c              
     s`  z,| j r| j|}t|}n| j|}W n tk
r   | jrd| _| | j}| j	|| j
| j| jdd\} }|  | j | jd k	r|    fdd| D }| |f Y S |    Y nX d| _| j}| jjr| jrtdg }t| jjD ]H}| j
d kr||}	n|| j
| }	| j|	|dd}	||	 qt|}| jd k	rj| |}| |}t| }
d	d t||
D }| ||\}}| || j}nt| }
| jd k	st t!| j}| |}| jd k	r| |}d
d |
D }| jd kr | "|| dd t||
D }| ||\}}| #|||\}}|||fS )NFr"   r"   c                   s   i | ]\}}| kr||qS r#   r#   )r%   kvcolumnsr#   r)   rP      s       z'CParserWrapper.read.<locals>.<dictcomp>z file structure not yet supportedT)try_parse_datesc                 S  s   i | ]\}\}}||qS r#   r#   r%   rV   r&   rW   r#   r#   r)   rP   "  s    
  c                 S  s   g | ]}|d  qS )   r#   rQ   r#   r#   r)   r*   :  s     z'CParserWrapper.read.<locals>.<listcomp>c                 S  s   i | ]\}\}}||qS r#   r#   r[   r#   r#   r)   rP   >  s    
  )$r   r   Zread_low_memory_concatenate_chunksreadStopIterationZ_first_chunkZ_maybe_dedup_namesr>   Z_get_empty_metar3   r:   r0   r6   Z_maybe_make_multi_index_columnsr;   r   _filter_usecolsitemsrM   r8   rG   rF   NotImplementedErrorr<   r2   _maybe_parse_datesappendr   sortedzipZ_do_date_conversionsr@   r=   Z_check_data_lengthZ_make_index)r(   rT   chunksdatar8   indexZcol_dictZarraysr&   valuesZ	data_tupsZcolumn_namesZ	date_dataZalldatar#   rX   r)   r^      st    







 


zCParserWrapper.readzSequence[Hashable])r8   r   c                   s@   |  | j|  d k	r<t|t kr< fddt|D }|S )Nc                   s$   g | ]\}}| ks| kr|qS r#   r#   )r%   r&   namer-   r#   r)   r*   I  s      z2CParserWrapper._filter_usecols.<locals>.<listcomp>)r?   r   rC   rD   )r(   r8   r#   r-   r)   r`   E  s    
zCParserWrapper._filter_usecolsc                 C  sH   t | jjd }d }| jjdkr@| jd k	r@| || j\}}| _||fS )Nr   )r=   r   r9   rG   r3   rH   )r(   r8   Z	idx_namesr#   r#   r)   _get_index_namesN  s     zCParserWrapper._get_index_namesTint)ri   rZ   c                 C  s   |r|  |r| |}|S rL   )Z_should_parse_datesZ
_date_conv)r(   rj   ri   rZ   r#   r#   r)   rc   Y  s    
z!CParserWrapper._maybe_parse_dates)N)T)__name__
__module____qualname____annotations__r/   rM   rE   r^   r`   rl   rc   __classcell__r#   r#   rJ   r)   r   -   s   
  k	r   zlist[dict[int, ArrayLike]]dict)rg   r   c                   s  t | d  }g }i }|D ]  fdd| D }dd |D }dd |D }t|dkrt|g }|ttkr|t  |	 }t
|rt|dd	| < qt|tr| }	|	|| < qt|| < q|rd
|}
dd|
 dg}tj|tt d |S )z
    Concatenate chunks of data read with low_memory=True.

    The tricky part is handling Categoricals, where different chunks
    may have different inferred categories.
    r   c                   s   g | ]}|  qS r#   )r2   )r%   chunkrk   r#   r)   r*   k  s     z'_concatenate_chunks.<locals>.<listcomp>c                 S  s   h | ]
}|j qS r#   rU   )r%   ar#   r#   r)   	<setcomp>m  s     z&_concatenate_chunks.<locals>.<setcomp>c                 S  s   h | ]}t |s|qS r#   )r   rQ   r#   r#   r)   rw   o  s      r\   F)Zsort_categories, z	Columns (zK) have mixed types. Specify dtype option on import or set low_memory=False.)
stacklevel)r=   keysrC   npZfind_common_typer"   objectrd   strr2   r   r   
isinstancer   Zconstruct_array_typeZ_concat_same_typeZconcatenatejoinwarningswarnr   r   )rg   r8   Zwarning_columnsresultZarrsZdtypesZnumpy_dtypesZcommon_typer"   Z
array_typeZwarning_namesZwarning_messager#   ru   r)   r]   _  s>    



r]   z*DtypeArg | dict[Hashable, DtypeArg] | Nonez*DtypeObj | dict[Hashable, DtypeObj] | None)r"   r   c                   sx   t trHt  t fdd} D ]}t| ||< q.|S t trdfddD S dk	rttS S )zc
    Ensure we have either None, a dtype object, or a dictionary mapping to
    dtype objects.
    c                     s    S rL   r#   r#   )default_dtyper#   r)   <lambda>      z#ensure_dtype_objs.<locals>.<lambda>c                   s   i | ]}|t  | qS r#   )r   )r%   rV   rU   r#   r)   rP     s      z%ensure_dtype_objs.<locals>.<dictcomp>N)r   r   r   default_factoryr{   rs   )r"   Zdtype_convertedrI   r#   )r   r"   r)   r5     s    

r5   )*
__future__r   collectionsr   typingr   r   r   r   r   Znumpyr|   Zpandas._libs.parsersZ_libsr7   Zpandas._typingr   r	   r
   r   Zpandas.errorsr   Zpandas.util._exceptionsr   Zpandas.core.dtypes.commonr   r   Zpandas.core.dtypes.concatr   Zpandas.core.dtypes.dtypesr   Zpandas.core.indexes.apir   Zpandas.io.parsers.base_parserr   r   Zpandasr   r   r   r]   r5   r#   r#   r#   r)   <module>   s(     4A