Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ MultiIndex

I/O
^^^
- :func:`read_csv` with ``engine="pyarrow"`` now raises ``ValueError`` for the unsupported ``na_filter=False`` instead of silently ignoring it (:issue:`65924`)
- :func:`read_csv` with ``memory_map=True`` and an in-memory buffer (e.g. ``BytesIO``) now raises a clear ``ValueError`` instead of a cryptic ``UnsupportedOperation: fileno`` (:issue:`45630`)
- Fixed bug in :func:`read_csv` with the ``c`` engine where an embedded ``\r`` followed by a space in an unquoted field could cause an infinite re-parsing loop, producing spurious rows or a buffer overflow (:issue:`51141`)
- Fixed bug in :func:`read_excel` where usage of ``skiprows`` could lead to an infinite loop (:issue:`64027`)
Expand Down
1 change: 1 addition & 0 deletions pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ class _Fwf_Defaults(TypedDict):
"dayfirst",
"skipinitialspace",
"low_memory",
"na_filter",
}


Expand Down
38 changes: 26 additions & 12 deletions pandas/tests/io/parser/test_na_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,17 +440,23 @@ def test_na_values_na_filter_override(
request, all_parsers, na_filter, row_data, using_infer_string
):
parser = all_parsers
if parser.engine == "pyarrow":
# mismatched dtypes in both cases, FutureWarning in the True case
if not (using_infer_string and na_filter):
mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
request.applymarker(mark)
data = """\
A,B
1,A
nan,B
3,C
"""
if parser.engine == "pyarrow":
if na_filter is False:
msg = "The 'na_filter' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), na_values=["B"], na_filter=na_filter)
return
if not using_infer_string:
# mismatched dtypes, FutureWarning
mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
request.applymarker(mark)

result = parser.read_csv(StringIO(data), na_values=["B"], na_filter=na_filter)

expected = DataFrame(row_data, columns=["A", "B"])
Expand Down Expand Up @@ -636,16 +642,18 @@ def test_empty_na_values_no_default_with_index(all_parsers):
@pytest.mark.parametrize(
"na_filter,index_data", [(False, ["", "5"]), (True, [np.nan, 5.0])]
)
def test_no_na_filter_on_index(all_parsers, na_filter, index_data, request):
def test_no_na_filter_on_index(all_parsers, na_filter, index_data):
# see gh-5239
#
# Don't parse NA-values in index unless na_filter=True
parser = all_parsers
data = "a,b,c\n1,,3\n4,5,6"

if parser.engine == "pyarrow" and na_filter is False:
mark = pytest.mark.xfail(reason="mismatched index result")
request.applymarker(mark)
msg = "The 'na_filter' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), index_col=[1], na_filter=na_filter)
return

expected = DataFrame({"a": [1, 4], "c": [3, 6]}, index=Index(index_data, name="b"))
result = parser.read_csv(StringIO(data), index_col=[1], na_filter=na_filter)
Expand All @@ -671,12 +679,18 @@ def test_na_values_with_dtype_str_and_na_filter(
):
# see gh-20377
parser = all_parsers
if parser.engine == "pyarrow" and (na_filter is False or not using_infer_string):
mark = pytest.mark.xfail(reason="mismatched shape")
request.applymarker(mark)

data = "a,b,c\n1,,3\n4,5,6"

if parser.engine == "pyarrow":
if na_filter is False:
msg = "The 'na_filter' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), na_filter=na_filter, dtype=str)
return
if not using_infer_string:
mark = pytest.mark.xfail(reason="mismatched shape")
request.applymarker(mark)

# na_filter=True --> missing value becomes NaN.
# na_filter=False --> missing value remains empty string.
empty = np.nan if na_filter else ""
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,13 @@ def test_parse_dates_empty_string(all_parsers):
# see gh-2263
parser = all_parsers
data = "Date,test\n2012-01-01,1\n,2"

if parser.engine == "pyarrow":
msg = "The 'na_filter' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), parse_dates=["Date"], na_filter=False)
return

result = parser.read_csv(StringIO(data), parse_dates=["Date"], na_filter=False)

expected = DataFrame(
Expand Down