Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ MultiIndex
I/O
^^^
- :func:`read_csv` with ``memory_map=True`` and an in-memory buffer (e.g. ``BytesIO``) now raises a clear ``ValueError`` instead of a cryptic ``UnsupportedOperation: fileno`` (:issue:`45630`)
- Fixed bug in :func:`read_csv` with ``engine="pyarrow"`` where passing tuples in ``names`` produced flat columns instead of :class:`MultiIndex` columns (:issue:`65862`)
- Fixed bug in :func:`read_csv` with the ``c`` engine where an embedded ``\r`` followed by a space in an unquoted field could cause an infinite re-parsing loop, producing spurious rows or a buffer overflow (:issue:`51141`)
- Fixed bug in :func:`read_excel` where usage of ``skiprows`` could lead to an infinite loop (:issue:`64027`)
- Fixed bug where :func:`read_html` parsed nested tables incorrectly when using ``html5lib`` or ``bs4`` flavors (:issue:`64524`)
Expand Down
4 changes: 4 additions & 0 deletions pandas/io/parsers/arrow_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,10 @@ def _finalize_pandas_output(
frame = self._do_date_conversions(frame.columns, frame)
frame = self._finalize_index(frame, multi_index_named)
frame = self._finalize_dtype(frame)
# tuples passed via names imply MultiIndex columns, as with other engines
frame.columns = self._maybe_make_multi_index_columns(
list(frame.columns), self.col_names
)
return frame

def _validate_usecols(self, usecols) -> None:
Expand Down
36 changes: 30 additions & 6 deletions pandas/tests/io/parser/test_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,6 @@ def test_header_multi_index_invalid(all_parsers, kwargs, msg):
_TestTuple = namedtuple("_TestTuple", ["first", "second"])


@xfail_pyarrow # TypeError: an integer is required
@pytest.mark.parametrize(
"kwargs",
[
Expand Down Expand Up @@ -228,8 +227,13 @@ def test_header_multi_index_invalid(all_parsers, kwargs, msg):
},
],
)
def test_header_multi_index_common_format1(all_parsers, kwargs):
def test_header_multi_index_common_format1(all_parsers, kwargs, request):
parser = all_parsers
if parser.engine == "pyarrow" and "header" in kwargs:
# list-valued header is unsupported by the pyarrow engine
request.applymarker(
pytest.mark.xfail(reason="TypeError: an integer is required")
)
expected = DataFrame(
[[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
index=["one", "two"],
Expand All @@ -247,7 +251,6 @@ def test_header_multi_index_common_format1(all_parsers, kwargs):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # TypeError: an integer is required
@pytest.mark.parametrize(
"kwargs",
[
Expand Down Expand Up @@ -276,8 +279,13 @@ def test_header_multi_index_common_format1(all_parsers, kwargs):
},
],
)
def test_header_multi_index_common_format2(all_parsers, kwargs):
def test_header_multi_index_common_format2(all_parsers, kwargs, request):
parser = all_parsers
if parser.engine == "pyarrow" and "header" in kwargs:
# list-valued header is unsupported by the pyarrow engine
request.applymarker(
pytest.mark.xfail(reason="TypeError: an integer is required")
)
expected = DataFrame(
[[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
index=["one", "two"],
Expand All @@ -294,7 +302,6 @@ def test_header_multi_index_common_format2(all_parsers, kwargs):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # TypeError: an integer is required
@pytest.mark.parametrize(
"kwargs",
[
Expand Down Expand Up @@ -323,8 +330,13 @@ def test_header_multi_index_common_format2(all_parsers, kwargs):
},
],
)
def test_header_multi_index_common_format3(all_parsers, kwargs):
def test_header_multi_index_common_format3(all_parsers, kwargs, request):
parser = all_parsers
if parser.engine == "pyarrow" and "header" in kwargs:
# list-valued header is unsupported by the pyarrow engine
request.applymarker(
pytest.mark.xfail(reason="TypeError: an integer is required")
)
expected = DataFrame(
[[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
index=["one", "two"],
Expand Down Expand Up @@ -469,6 +481,18 @@ def test_no_header(all_parsers, kwargs, names):
tm.assert_frame_equal(result, expected)


def test_names_tuples_multi_index_columns(all_parsers):
# GH#65862 tuples passed via names produce MultiIndex columns for all engines
parser = all_parsers
data = "1,2\n3,4"
result = parser.read_csv(StringIO(data), names=[("a", "x"), ("b", "y")])
expected = DataFrame(
[[1, 2], [3, 4]],
columns=MultiIndex.from_tuples([("a", "x"), ("b", "y")]),
)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("header", [["a", "b"], "string_header"])
def test_non_int_header(all_parsers, header):
# see gh-16338
Expand Down