Skip to content

Commit a452348

Browse files
BUG: Allow ExtensionArray to use void dtype (#61637)
Co-authored-by: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com>
1 parent 433b140 commit a452348

File tree

4 files changed

+95
-1
lines changed

4 files changed

+95
-1
lines changed

doc/source/whatsnew/v3.0.1.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Bug fixes
1515
^^^^^^^^^
1616
- Fixed a bug in the :class:`DataFrame` constructor when passed a :class:`Series` or
1717
:class:`Index` correctly handling Copy-on-Write (:issue:`63899`)
18+
- Allow :class:`.ExtensionArray` to have dtypes involving :class:`numpy.void` (:issue:`54810`)
1819

1920
.. ---------------------------------------------------------------------------
2021
.. _whatsnew_301.contributors:

pandas/core/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ def _validate_dtype(cls, dtype) -> DtypeObj | None:
477477
dtype = pandas_dtype(dtype)
478478

479479
# a compound dtype
480-
if dtype.kind == "V":
480+
if dtype.kind == "V" and not isinstance(dtype, ExtensionDtype):
481481
raise NotImplementedError(
482482
"compound dtypes are not implemented "
483483
f"in the {cls.__name__} constructor"

pandas/tests/extension/uuid/__init__.py

Whitespace-only changes.
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
from __future__ import annotations
2+
3+
from typing import (
4+
TYPE_CHECKING,
5+
ClassVar,
6+
Self,
7+
)
8+
from uuid import UUID
9+
10+
import numpy as np
11+
12+
from pandas.core.dtypes.dtypes import ExtensionDtype
13+
14+
import pandas as pd
15+
from pandas.core.arrays.base import ExtensionArray
16+
17+
if TYPE_CHECKING:
18+
import builtins
19+
from collections.abc import Iterable
20+
21+
from numpy.typing import NDArray
22+
23+
from pandas._typing import (
24+
Dtype,
25+
ScalarIndexer,
26+
)
27+
28+
29+
# 16 void bytes: 128 bit, every pattern valid, no funky behavior like 0 stripping.
30+
_UuidNumpyDtype = np.dtype("V16")
31+
32+
33+
class UuidDtype(ExtensionDtype):
34+
# ExtensionDtype essential API (3 class attrs and methods)
35+
36+
name: ClassVar[str] = "uuid"
37+
type: ClassVar[builtins.type[UUID]] = UUID
38+
39+
@classmethod
40+
def construct_array_type(cls) -> builtins.type[UuidExtensionArray]:
41+
return UuidExtensionArray
42+
43+
# ExtensionDtype overrides
44+
kind: ClassVar[str] = _UuidNumpyDtype.kind
45+
46+
47+
class UuidExtensionArray(ExtensionArray):
48+
# Implementation details and convenience
49+
50+
_data: NDArray[np.void]
51+
52+
def __init__(self, values: Iterable[UUID], *, copy: bool = False) -> None:
53+
self._data = np.array([x.bytes for x in values], dtype=_UuidNumpyDtype)
54+
55+
# Parts of ExtensionArray's essential API required for tests:
56+
57+
dtype: ClassVar[UuidDtype] = UuidDtype()
58+
59+
@classmethod
60+
def _from_sequence(
61+
cls,
62+
scalars: Iterable[UUID],
63+
*,
64+
dtype: Dtype | None = None,
65+
copy: bool = False,
66+
) -> Self:
67+
if dtype is None:
68+
dtype = UuidDtype()
69+
return cls(scalars, copy=copy)
70+
71+
def __getitem__(self, index: ScalarIndexer) -> UUID: # type: ignore[override]
72+
assert isinstance(index, int | np.integer)
73+
return UUID(bytes=self._data[index].tobytes())
74+
75+
def __len__(self) -> int:
76+
return len(self._data)
77+
78+
79+
def test_construct() -> None:
80+
"""Tests that we can construct UuidExtensionArray from a list of valid values."""
81+
from uuid import uuid4
82+
83+
a = UuidExtensionArray([UUID(int=0), u := uuid4()])
84+
assert a[0].int == 0
85+
assert a[1] == u
86+
87+
88+
def test_series() -> None:
89+
"""Tests that Series accepts (unstructured) void ExtensionDtypes."""
90+
from uuid import uuid4
91+
92+
s = pd.Series([u := uuid4()], dtype=UuidDtype(), name="s")
93+
assert str(u) in str(s)

0 commit comments

Comments
 (0)