Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,23 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None,
if type is not None and type.id == _Type_EXTENSION:
extension_type = type
type = type.storage_type
# GH-49644: when building a fixed_shape_tensor from a sequence of arrays,
# the converter only sees the flat storage type, so validate the
# tensor-specific constraints here where the type is still known.
if (isinstance(extension_type, FixedShapeTensorType)
and isinstance(obj, (list, tuple))):
if extension_type.permutation is not None:
raise NotImplementedError(
"Converting a sequence of arrays to a fixed_shape_tensor "
"with a permutation is not supported")
expected_shape = tuple(extension_type.shape)
for element in obj:
shape = getattr(element, "shape", None)
if (shape is not None and len(shape) >= 2
and tuple(shape) != expected_shape):
raise ValueError(
f"Cannot convert array of shape {tuple(shape)} to a "
f"fixed_shape_tensor of shape {expected_shape}")

if from_pandas is None:
c_from_pandas = False
Expand Down
23 changes: 20 additions & 3 deletions python/pyarrow/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -908,13 +908,30 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {

Status AppendNdarray(PyObject* value) {
PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(value);
if (PyArray_NDIM(ndarray) != 1) {
return Status::Invalid("Can only convert 1-dimensional array values");
}
if (PyArray_ISBYTESWAPPED(ndarray)) {
// TODO
return Status::NotImplemented("Byte-swapped arrays not supported");
}
OwnedRef flattened;
if (PyArray_NDIM(ndarray) != 1) {
Comment thread
aboderinsamuel marked this conversation as resolved.
// GH-49644: a fixed-size list (e.g. fixed-shape-tensor storage) is built
// from a multi- or 0-dimensional array by flattening it in C order.
if (this->list_type_->id() != Type::FIXED_SIZE_LIST) {
return Status::Invalid("Can only convert 1-dimensional array values of ",
this->list_type_->ToString(), " to a variable-sized list");
}
// Get an aligned, C-contiguous array (copying only if needed).
PyObject* contiguous =
PyArray_CheckFromAny(value, nullptr, /*min_depth=*/0, /*max_depth=*/0,
NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_ALIGNED, nullptr);
RETURN_IF_PYERROR();
flattened.reset(
PyArray_Ravel(reinterpret_cast<PyArrayObject*>(contiguous), NPY_CORDER));
Comment thread
rok marked this conversation as resolved.
Py_DECREF(contiguous);
RETURN_IF_PYERROR();
value = flattened.obj();
ndarray = reinterpret_cast<PyArrayObject*>(value);
}
const int64_t size = PyArray_SIZE(ndarray);
RETURN_NOT_OK(AppendTo(this->list_type_, size));
RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size));
Expand Down
24 changes: 24 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2924,6 +2924,30 @@ def test_array_from_invalid_dim_raises():
pa.array(arr0d)


@pytest.mark.numpy
def test_fixed_size_list_from_multidim_ndarray():
arr = pa.array([np.array([[1, 2, 3]], dtype=np.int64),
np.array([[4, 5, 6]], dtype=np.int64)],
type=pa.list_(pa.int64(), 3))
assert arr.type == pa.list_(pa.int64(), 3)
assert arr.to_pylist() == [[1, 2, 3], [4, 5, 6]]
Comment thread
aboderinsamuel marked this conversation as resolved.

arr = pa.array([np.array([[1, 2], [3, 4]], dtype=np.int64)],
type=pa.list_(pa.int64(), 4))
assert arr.to_pylist() == [[1, 2, 3, 4]]

with pytest.raises(pa.lib.ArrowInvalid):
pa.array([np.array([[1, 2], [3, 4]], dtype=np.int64)],
type=pa.list_(pa.int64(), 3))

with pytest.raises(pa.lib.ArrowInvalid, match=r"array values of .*int64"):
pa.array([np.array([[1, 2, 3]], dtype=np.int64)],
type=pa.list_(pa.int64()))

arr = pa.array([np.array(1, dtype=np.int64)], type=pa.list_(pa.int64(), 1))
assert arr.to_pylist() == [[1]]


@pytest.mark.numpy
def test_array_from_strided_bool():
# ARROW-6325
Expand Down
66 changes: 66 additions & 0 deletions python/pyarrow/tests/test_extension_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -1730,6 +1730,72 @@ def test_tensor_array_from_numpy(np_type_str):
pa.FixedShapeTensorArray.from_numpy_ndarray(arr, dim_names=[0, 1])


@pytest.mark.numpy
@pytest.mark.parametrize("np_type_str", ("int8", "int64", "float32"))
def test_tensor_array_from_list_of_ndarrays(np_type_str):
np_dtype = np.dtype(np_type_str)
tensor_type = pa.fixed_shape_tensor(pa.from_numpy_dtype(np_dtype), (2, 3))

elements = [
np.arange(6, dtype=np_dtype).reshape(2, 3),
np.arange(6, 12, dtype=np_dtype).reshape(2, 3),
]
result = pa.array(elements, type=tensor_type)
assert isinstance(result, pa.FixedShapeTensorArray)
assert result.type == tensor_type
assert len(result) == 2

expected = pa.FixedShapeTensorArray.from_numpy_ndarray(np.stack(elements))
assert result.storage.equals(expected.storage)

for scalar, original in zip(result, elements):
np.testing.assert_array_equal(scalar.to_numpy(), original)

tensor_3d = pa.fixed_shape_tensor(pa.from_numpy_dtype(np_dtype), (2, 2, 3))
elements_3d = [np.arange(12, dtype=np_dtype).reshape(2, 2, 3)]
result_3d = pa.array(elements_3d, type=tensor_3d)
assert result_3d.type == tensor_3d
np.testing.assert_array_equal(result_3d[0].to_numpy(), elements_3d[0])

result_with_null = pa.array([elements[0], None], type=tensor_type)
assert result_with_null.null_count == 1
assert result_with_null[1].as_py() is None

Comment thread
rok marked this conversation as resolved.
with pytest.raises(ValueError, match="shape"):
pa.array([np.arange(6, dtype=np_dtype).reshape(3, 2)], type=tensor_type)

permuted_type = pa.fixed_shape_tensor(
pa.from_numpy_dtype(np_dtype), (2, 3), permutation=[1, 0])
with pytest.raises(NotImplementedError, match="permutation"):
pa.array(elements, type=permuted_type)


@pytest.mark.numpy
def test_tensor_array_from_list_mixed_layout():
# C- and F-ordered arrays with the same values must produce the same
# result, since the values are always flattened in C order.
tensor_type = pa.fixed_shape_tensor(pa.int64(), (2, 3))
raw = [[1, 2, 3], [4, 5, 6]]
c_arr = np.array(raw, order="C")
f_arr = np.array(raw, order="F")
assert np.array_equal(c_arr, f_arr)
assert c_arr.tobytes("A") != f_arr.tobytes("A")

same = pa.array([c_arr, c_arr], type=tensor_type)
mixed = pa.array([c_arr, f_arr], type=tensor_type)
assert mixed.equals(same)
assert mixed.storage.to_pylist() == [[1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6]]


@pytest.mark.numpy
def test_tensor_array_from_list_of_0d_arrays():
tensor_type = pa.fixed_shape_tensor(pa.int64(), ())
result = pa.array([np.array(1, dtype=np.int64), np.array(2, dtype=np.int64)],
type=tensor_type)
assert result.type == tensor_type
assert result.storage.to_pylist() == [[1], [2]]


@pytest.mark.numpy
@pytest.mark.parametrize("tensor_type", (
pa.fixed_shape_tensor(pa.int8(), [2, 2, 3]),
Expand Down
Loading