U
    sVcI                     @   s  d Z ddlmZ ddlmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ ejdZedd	 Zd
d Zejddddgddggdd Zejdddgdd Zdd Zedd Zdd Zedd Zejdd d!d"gid#fdd$gd!d"gd%d&fdd$gd!d"gd'd(fgd)d* Zed+d,d-gZeejd.ddd$gid/d0d1d2d3d4d5gd6d/ed7d8ed7d9ed7d:ed;d<ed=d>ed=d?gd6gd@dA Zeejd.ddd$gidd0d1d2d3d4d5gd6ded7d8ed7d9ed7d:ed;d<ed=d>ed=d?gd6gdBdC Z eejd.ddd$gidd0d1d2d3d4d5gd6ded7d8ed7d9ed7d:ed;d<ed=d>ed=d?gd6gdDdE Z!edFdG Z"edHdI Z#edJdK Z$edLdM Z%eejdNdOdPgdQdR Z&eejd.i d digdSdT Z'ejdUi dd$dd/dgfdVdWidXdYdZd[d\gfd]d!d"d^d_d`gid!d"d^d_d`gfgdadb Z(ejdd7d;gdcgddde Z)edfdg Z*eejdhdieddjdkdlgge+dmdndodpgdqfdreddjdkdlgge+dmdndsdpgdqfdteddjdkdldlgge+dmdndsdpdugdqfgdvdw Z,eejd ddggejdxddydzgdzdygdzd{ggd|d} Z-ed~d Z.edd Z/edd Z0edd Z1edd Z2edd Z3edd Z4dS )zx
Tests that the file header is properly handled or inferred
during parsing for all of the parsers defined in parsers.py
    )
namedtuple)StringION)ParserError)	DataFrameIndex
MultiIndexZpyarrow_skipc              	   C   s>   | }d}t jt|d td}|j|dgd W 5 Q R X d S )Nzbut only \d+ lines in filematchz,,
   header)pytestraises
ValueErrorr   read_csv)all_parsersparsermsgs r   F/tmp/pip-unpacked-wheel-xj8nt62q/pandas/tests/io/parser/test_header.pytest_read_with_bad_header   s
    r   c              	   C   s8   | }d}t jtdd |jt|dd W 5 Q R X d S )N$1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
zUPassing negative integer to header is invalid. For no header, use header=None insteadr   r   r   r   r   r   r   r   r   datar   r   r   test_negative_header#   s    r   r   r         c              	   C   s8   | }d}t jtdd |jt||d W 5 Q R X d S )Nz<1,2,3,4,5
        6,7,8,9,10
        11,12,13,14,15
        z8cannot specify multi-index header with negative integersr   r   r   )r   r   r   r   r   r   r    test_negative_multi_index_header2   s     r!   TFc              	   C   s<   | }d}d}t jt|d |jt||d W 5 Q R X d S )NzMyColumn
a
b
a
bz#Passing a bool to header is invalidr   r   )r   r   	TypeErrorr   r   )r   r   r   r   r   r   r   r   test_bool_header_arg@   s
    r#   c              	   C   s   | }d}t jtdd |jt|dd d}W 5 Q R X tdddd	d
gdddddgdddddggdddddgd}t || d S )Nr   FZcheck_stacklevelZField)prefixr      r      r               	   r
                  ZField0ZField1ZField2ZField3ZField4columns)tmassert_produces_warningFutureWarningr   r   r   assert_frame_equalr   r   r   resultexpectedr   r   r   test_no_header_prefixO   s    &r;   c                 C   sf   | }d}dddg}|j t||d}tdddgd	d
dgdddggdddgdddgd}t|| d S )Nzfoo,1,2,3
bar,4,5,6
baz,7,8,9
ABCnamesr&   r   r'   r   r(   r)   r*   r+   r,   foobarbazindexr3   r   r   r   r4   r7   )r   r   r   r@   r9   r:   r   r   r   test_header_with_index_col^   s    
rG   c                 C   sD   | }d}d}|j t|ddd}|j t|ddd}t|| d S )Nzggot,to,ignore,this,line
got,to,ignore,this,line
index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
z7index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
r   r   r   	index_colr   r   r4   r7   )r   r   r   Zdata2r9   r:   r   r   r   test_header_not_first_linep   s    rK   c                 C   sJ   | }t jddddd}d}|jt|ddddgddgd	}t || d S )
Nr(   r'   r   r   )Zr_idx_nlevelsZc_idx_nlevels  C0,,C_l0_g0,C_l0_g1,C_l0_g2

C1,,C_l1_g0,C_l1_g1,C_l1_g2
C2,,C_l2_g0,C_l2_g1,C_l2_g2
C3,,C_l3_g0,C_l3_g1,C_l3_g2
R0,R1,,,
R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2
R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2
R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2
R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2
R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
r   r&   rH   )r4   ZmakeCustomDataframer   r   r7   r   r   r:   r   r9   r   r   r   test_header_multi_index   s
     rN   z
kwargs,msgrI   rA   rB   zLindex_col must only contain row numbers when specifying a multi-index headerr&   )rI   r@   z9cannot specify names when specifying a multi-index header)rI   Zusecolsz;cannot specify usecols when specifying a multi-index headerc              	   C   sH   d}| }t jt|d( |jt|fdddddgi| W 5 Q R X d S )NrL   r   r   r   r&   r   r'   r   )r   kwargsr   r   r   r   r   r   test_header_multi_index_invalid   s    rP   
_TestTuplefirstsecondrO   r'   aqrU   rrU   r   btcur^   v)Zskiprowsr@   rU   rV   rX   r   r[   r\   r^   r_   ra   c                 C   sr   | }t ddddddgddd	d
ddggddgtddddddgd}d}|jt|fddi|}t|| d S )Nr&   r   r'   r   r(   r)   r*   r+   r,   r
   r-   r.   onetworT   rW   rY   rZ   r]   r`   rD   zC,a,a,a,b,c,c
,q,r,s,t,u,v
,,,,,,
one,1,2,3,4,5,6
two,7,8,9,10,11,12rI   r   r   r   from_tuplesr   r   r4   r7   r   rO   r   r:   r   r9   r   r   r   &test_header_multi_index_common_format1   s    rg   c                 C   sr   | }t ddddddgddd	d
ddggddgtddddddgd}d}|jt|fddi|}t|| d S )Nr&   r   r'   r   r(   r)   r*   r+   r,   r
   r-   r.   rb   rc   rT   rW   rY   rZ   r]   r`   rD   z<,a,a,a,b,c,c
,q,r,s,t,u,v
one,1,2,3,4,5,6
two,7,8,9,10,11,12rI   r   rd   rf   r   r   r   &test_header_multi_index_common_format2   s    rh   c                 C   s~   | }t ddddddgddd	d
ddggddgtddddddgd}|jdd}d}|jt|fdd i|}t|| d S )Nr&   r   r'   r   r(   r)   r*   r+   r,   r
   r-   r.   rb   rc   rT   rW   rY   rZ   r]   r`   rD   T)Zdrop2a,a,a,b,c,c
q,r,s,t,u,v
1,2,3,4,5,6
7,8,9,10,11,12rI   )r   r   re   Zreset_indexr   r   r4   r7   rf   r   r   r   &test_header_multi_index_common_format3%  s    rj   c                 C   s   | }t tjdddddgdddd	d
ggddtddgtdddgdddddggdddddgdddddggddgdd}d}|jt|ddgdd}t|| d S )Nr   r'   r   r(   r)   r+   r,   r
   r-   r.   int64Zdtyper&   r*   rU   r[   r^   rX   r   r\   r_   ra   r   rV   levelscodesr@   rD   ri   rH   	r   nparrayr   r   r   r   r4   r7   rM   r   r   r   0test_header_multi_index_common_format_malformed1U  s    $
	rs   c                 C   s   | }t tjdddddgdddd	d
ggddtddgtdddgdddddggdddddgdddddggd dgdd}d}|jt|ddgdd}t|| d S )Nr   r'   r   r(   r)   r+   r,   r
   r-   r.   rk   rl   r&   r*   rU   r[   r^   rX   r   r\   r_   ra   r   rV   rm   rD   1,a,a,b,c,c
q,r,s,t,u,v
1,2,3,4,5,6
7,8,9,10,11,12rH   rp   rM   r   r   r   0test_header_multi_index_common_format_malformed2j  s    $

ru   c              
   C   s   | }t tjddddgddddggd	d
tddgddggddgddggdtdddgddddggddddgddddggd dgdd}d}|jt|ddgddgd}t|| d S )Nr'   r   r(   r)   r,   r
   r-   r.   rk   rl   r&   r*   r   r+   r   )rn   ro   rU   r[   r^   r   r\   r_   ra   rV   rm   rD   rt   rH   )r   rq   rr   r   r   r   r4   r7   rM   r   r   r   0test_header_multi_index_common_format_malformed3  s     "	rv   c                 C   s^   | }d d gddgddgg}t ddg}t||d}d}|jt|d	dgd
}t|| d S )Nr&   r   r'   r   )rU   r<   )r[   r=   r2   za,b
A,B
,
1,2
3,4r   r   )r   re   r   r   r   r4   r7   )r   r   r   r3   r:   r9   r   r   r   "test_header_multi_index_blank_line  s    rw   zdata,header)1,2,3
4,5,6N)zfoo,bar,baz
1,2,3
4,5,6r   c                 C   sF   | }|j tddddgd}|j t|dddg|d}t|| d S )Nrx   rU   r[   r^   r?   r@   r   rJ   )r   r   r   r   r:   r9   r   r   r   !test_header_names_backward_compat  s    rz   c                 C   s6   | }t dddgd}|jtdf|}t|| d S )NrU   r[   r^   r2   za,b,cr   r   r   r4   r7   )r   rO   r   r:   r9   r   r   r   test_read_only_header_no_rows  s    r|   zkwargs,namesr%   XZX0ZX1ZX2ZX3ZX4r@   rC   ZquuxZpandac              	   C   s   | }d}t dddddgddd	d
dgdddddgg|d}d| krztjtdd  |jt|fdd i|}W 5 Q R X n|jt|fdd i|}t|| d S )Nr   r&   r   r'   r   r(   r)   r*   r+   r,   r
   r-   r.   r/   r0   r1   r2   r%   Fr$   r   )r   keysr4   r5   r6   r   r   r7   )r   rO   r@   r   r   r:   r9   r   r   r   test_no_header  s    & &r   Zstring_headerc              	   C   s<   d}d}| }t jt|d |jt||d W 5 Q R X d S )Nz*header must be integer or list of integersz1,2
3,4r   r   r   )r   r   r   r   r   r   r   r   test_non_int_header  s
    r   c                 C   sH   d}| }t ddgddgddgd}|jt|dgd}t|| d S )Nza,b,c
0,1,2
1,2,3r   r&   r   r'   )rU   r[   r^   r   r{   )r   r   r   r:   r9   r   r   r   test_singleton_header  s
    r   zdata,expectedz#A,A,A,B
one,one,one,two
0,40,34,0.1(   "   g?)r<   rb   )r<   zone.1)r<   zone.2)r=   rc   r2   z%A,A,A,B
one,one,one.1,two
0,40,34,0.1)r<   zone.1.1z/A,A,A,B,B
one,one,one.1,two,two
0,40,34,0.1,0.1)r=   ztwo.1c                 C   s*   | }|j t|ddgd}t|| d S )Nr   r&   r   rJ   )r   r   r:   r   r9   r   r   r   test_mangles_multi_index  s    )r   r3    ZUnnamedZ
NotUnnamedc                 C   s   | }ddg}|d kr,d |p"ddgd }nd dg|p>ddg d }|jt|||d}g }|d krrdddg}t|D ]2\}}	|	sd|d kr|n|d  d	}	||	 qztt|d
dg}tddgddgg|d}
t	
||
 d S )Nr   r&   ,r   z
0,1
2,3
4,5
z
,0,1
0,2,3
1,4,5
rH   z	Unnamed: Z_level_001r   r'   r   r(   r2   )joinr   r   	enumerateappendr   re   zipr   r4   r7   )r   rI   r3   r   r   r   r9   Zexp_columnsicolr:   r   r   r   test_multi_index_unnamed  s     
r   c                 C   sN   | }d}|j t|ddddgd}tddgd	d
gddgd}t|| d S )Nza, b
1,2,3
5,6,4
r   r<   r=   r>   )r   r@   r&   r(   r   r)   r'   r   )r<   r=   r>   rF   r8   r   r   r   6test_names_longer_than_header_but_equal_with_data_rowsA  s
    r   c                 C   s   | }d}d}t dddddg}tdd	d
ddgdd	d
ddgg|d}|jt|ddgd}t||jd d  |jt|ddgd}t|| d S )NzFMale, Male, Male, Female, Female
R, R, L, R, R
.86, .67, .88, .78, .81z^Male, Male, Male, Female, Female
R, R, L, R, R
.86, .67, .88, .78, .81
.86, .67, .88, .78, .82)ZMaleR) Male R)r   z L) Femaler   )r   z R.1gQ?gq=
ףp?g)\(?g(\?gQ?g=
ףp=?r2   r   r&   r   )r   re   r   r   r   r4   r7   Ziloc)r   r   s1s2mir:   Zdf1Zdf2r   r   r    test_read_csv_multiindex_columnsN  s(    	 r   c              	   C   s<   | }d}t jtdd |jt|ddgd W 5 Q R X d S )Nz1row11,row12,row13
row21,row22, row23
row31,row32
z1Header rows must have an equal number of columns.r   r   r   r   r   r   r   r   r   )r   r   Zcaser   r   r   'test_read_csv_multi_header_length_checkn  s     r   c                 C   sX   | }d}|j t|ddgd d}tdddgdtjtjgd	d
ddgd}t|| d S )Nzx,1,5
y,2
z,3
rU   r[   ry   r&   r   r'   r(   rU   r[   xyz)rE   )r   r   r   rq   nanr4   r7   r8   r   r   r   #test_header_none_and_implicit_index~  s     r   c              	   C   s>   | }d}t jtdd |jt|ddgd d W 5 Q R X d S )Nx,1
y,2,5
z,3
z"Expected 2 fields in line 2, saw 3r   rU   r[   ry   r   r   r   r   r   1test_header_none_and_implicit_index_in_second_row  s    r   c                 C   sH   | }d}|j t|ddgd dd}tddgdd	gd
}t|| d S )Nr   rU   r[   skip)r@   r   Zon_bad_linesr   r   r&   r'   r   rF   r8   r   r   r   &test_header_none_and_on_bad_lines_skip  s       r   c              	   C   sB   | }d}d}t jt|d |jt|dddgd W 5 Q R X d S )Nza,b
1,2
z;Passed header=\[0,1,2\], len of 3, but only 2 lines in filer   r   r&   r   r   r   )r   r   r   r   r   r   r   test_header_missing_rows  s
    r   )5__doc__collectionsr   ior   Znumpyrq   r   Zpandas.errorsr   Zpandasr   r   r   Zpandas._testingZ_testingr4   markZusefixturesZskip_pyarrowr   r   Zparametrizer!   r#   r;   rG   rK   rN   rP   rQ   rg   rh   rj   rs   ru   rv   rw   rz   r|   r   r   r   re   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s  
	




	







 		








& "





