From ad6ed69cdaab7755b9cb3c4d95756597b9641189 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 21 Jun 2026 12:34:54 -0700
Subject: [PATCH] BUG: read_csv pyarrow engine ignored tuple names for
 MultiIndex columns

Passing tuples in ``names`` to ``read_csv`` with ``engine="pyarrow"``
produced flat columns instead of MultiIndex columns like the other
engines. Route the result columns through ``_maybe_make_multi_index_columns``
as the C/python engines do.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 doc/source/whatsnew/v3.1.0.rst            |  1 +
 pandas/io/parsers/arrow_parser_wrapper.py |  4 +++
 pandas/tests/io/parser/test_header.py     | 36 +++++++++++++++++++----
 3 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst
index 10f24f01e7688..7bcc547afa1b3 100644
--- a/doc/source/whatsnew/v3.1.0.rst
+++ b/doc/source/whatsnew/v3.1.0.rst
@@ -350,6 +350,7 @@ MultiIndex
 I/O
 ^^^
 - :func:`read_csv` with ``memory_map=True`` and an in-memory buffer (e.g. ``BytesIO``) now raises a clear ``ValueError`` instead of a cryptic ``UnsupportedOperation: fileno`` (:issue:`45630`)
+- Fixed bug in :func:`read_csv` with ``engine="pyarrow"`` where passing tuples in ``names`` produced flat columns instead of :class:`MultiIndex` columns (:issue:`65862`)
 - Fixed bug in :func:`read_csv` with the ``c`` engine where an embedded ``\r`` followed by a space in an unquoted field could cause an infinite re-parsing loop, producing spurious rows or a buffer overflow (:issue:`51141`)
 - Fixed bug in :func:`read_excel` where usage of ``skiprows`` could lead to an infinite loop (:issue:`64027`)
 - Fixed bug where :func:`read_html` parsed nested tables incorrectly when using ``html5lib`` or ``bs4`` flavors (:issue:`64524`)
diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 0ca0bd921c74a..d3bef60452396 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -252,6 +252,10 @@ def _finalize_pandas_output(
         frame = self._do_date_conversions(frame.columns, frame)
         frame = self._finalize_index(frame, multi_index_named)
         frame = self._finalize_dtype(frame)
+        # tuples passed via names imply MultiIndex columns, as with other engines
+        frame.columns = self._maybe_make_multi_index_columns(
+            list(frame.columns), self.col_names
+        )
         return frame
 
     def _validate_usecols(self, usecols) -> None:
diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
index eed553f6d20f6..cfe0a6196f16f 100644
--- a/pandas/tests/io/parser/test_header.py
+++ b/pandas/tests/io/parser/test_header.py
@@ -199,7 +199,6 @@ def test_header_multi_index_invalid(all_parsers, kwargs, msg):
 _TestTuple = namedtuple("_TestTuple", ["first", "second"])
 
 
-@xfail_pyarrow  # TypeError: an integer is required
 @pytest.mark.parametrize(
     "kwargs",
     [
@@ -228,8 +227,13 @@ def test_header_multi_index_invalid(all_parsers, kwargs, msg):
         },
     ],
 )
-def test_header_multi_index_common_format1(all_parsers, kwargs):
+def test_header_multi_index_common_format1(all_parsers, kwargs, request):
     parser = all_parsers
+    if parser.engine == "pyarrow" and "header" in kwargs:
+        # list-valued header is unsupported by the pyarrow engine
+        request.applymarker(
+            pytest.mark.xfail(reason="TypeError: an integer is required")
+        )
     expected = DataFrame(
         [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
         index=["one", "two"],
@@ -247,7 +251,6 @@ def test_header_multi_index_common_format1(all_parsers, kwargs):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # TypeError: an integer is required
 @pytest.mark.parametrize(
     "kwargs",
     [
@@ -276,8 +279,13 @@ def test_header_multi_index_common_format1(all_parsers, kwargs):
         },
     ],
 )
-def test_header_multi_index_common_format2(all_parsers, kwargs):
+def test_header_multi_index_common_format2(all_parsers, kwargs, request):
     parser = all_parsers
+    if parser.engine == "pyarrow" and "header" in kwargs:
+        # list-valued header is unsupported by the pyarrow engine
+        request.applymarker(
+            pytest.mark.xfail(reason="TypeError: an integer is required")
+        )
     expected = DataFrame(
         [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
         index=["one", "two"],
@@ -294,7 +302,6 @@ def test_header_multi_index_common_format2(all_parsers, kwargs):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # TypeError: an integer is required
 @pytest.mark.parametrize(
     "kwargs",
     [
@@ -323,8 +330,13 @@ def test_header_multi_index_common_format2(all_parsers, kwargs):
         },
     ],
 )
-def test_header_multi_index_common_format3(all_parsers, kwargs):
+def test_header_multi_index_common_format3(all_parsers, kwargs, request):
     parser = all_parsers
+    if parser.engine == "pyarrow" and "header" in kwargs:
+        # list-valued header is unsupported by the pyarrow engine
+        request.applymarker(
+            pytest.mark.xfail(reason="TypeError: an integer is required")
+        )
     expected = DataFrame(
         [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
         index=["one", "two"],
@@ -469,6 +481,18 @@ def test_no_header(all_parsers, kwargs, names):
     tm.assert_frame_equal(result, expected)
 
 
+def test_names_tuples_multi_index_columns(all_parsers):
+    # GH#65862 tuples passed via names produce MultiIndex columns for all engines
+    parser = all_parsers
+    data = "1,2\n3,4"
+    result = parser.read_csv(StringIO(data), names=[("a", "x"), ("b", "y")])
+    expected = DataFrame(
+        [[1, 2], [3, 4]],
+        columns=MultiIndex.from_tuples([("a", "x"), ("b", "y")]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize("header", [["a", "b"], "string_header"])
 def test_non_int_header(all_parsers, header):
     # see gh-16338