Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1418,7 +1418,24 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
if not len(values):
return np.zeros(len(self), dtype=bool)

result = pc.is_in(self._pa_array, value_set=pa.array(values))
value_set = pa.array(values, from_pandas=True)
if pa.types.is_null(value_set.type):
# GH#63304: If we have explicit pd.NA, we want to allow the comparison
# to return False (not found) rather than raising ArrowInvalid.
# However, we need to be careful not to swallow other types that might
# be inferred as null (e.g. [np.nan]) which logic elsewhere might rely
# on crashing to trigger fallback (e.g. in parsers).
has_pd_na = False
for x in values:
# GH#63304: Check for pd.NA (NAType) specifically
if isna(x) and not isinstance(x, (float, np.floating, type(None))):
has_pd_na = True
break

if has_pd_na:
value_set = value_set.cast(self._pa_array.type)

result = pc.is_in(self._pa_array, value_set=value_set)
# pyarrow 2.0.0 returned nulls, so we explicitly specify dtype to convert nulls
# to False
return np.array(result, dtype=np.bool_)
Expand Down
47 changes: 47 additions & 0 deletions pandas/tests/indexing/test_gh63304.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import pytest

import pandas as pd
import pandas._testing as tm

pa = pytest.importorskip("pyarrow", minversion="13.0.0")


def test_drop_na_arrow_index():
# GH#63304
# Test that dropping pd.NA from PyArrow-backed Index does not raise ArrowInvalid

# integer
df = pd.DataFrame(
{"A": [1, 2, 3]}, index=pd.Index([1, 2, 3], dtype="int64[pyarrow]")
)
# pd.NA is not in index, should raise KeyError, but NOT ArrowInvalid
with pytest.raises(KeyError, match="not found in axis"):
df.drop(index=[pd.NA])

# string
df = pd.DataFrame(
{"A": [1, 2, 3]}, index=pd.Index(["a", "b", "c"], dtype="string[pyarrow]")
)
with pytest.raises(KeyError, match="not found in axis"):
df.drop(index=[pd.NA])

# binary
df = pd.DataFrame(
{"A": [1, 2, 3]},
index=pd.Index([b"a", b"b", b"c"], dtype="binary[pyarrow]"),
)
with pytest.raises(KeyError, match="not found in axis"):
df.drop(index=[pd.NA])

# Case where NA IS in the index (should verify it drops correctly)
df = pd.DataFrame({"A": [1, 2]}, index=pd.Index([1, pd.NA], dtype="int64[pyarrow]"))
result = df.drop(index=[pd.NA])
expected = pd.DataFrame({"A": [1]}, index=pd.Index([1], dtype="int64[pyarrow]"))
tm.assert_frame_equal(result, expected)

df = pd.DataFrame(
{"A": [1, 2]}, index=pd.Index(["a", pd.NA], dtype="string[pyarrow]")
)
result = df.drop(index=[pd.NA])
expected = pd.DataFrame({"A": [1]}, index=pd.Index(["a"], dtype="string[pyarrow]"))
tm.assert_frame_equal(result, expected)
Loading