Skip to content

Commit fe14c12

Browse files
jhorstmannalamb
authored andcommitted
Fix string array equality when the values buffer is the same and only the offsets to access it differ (apache#9325)
# Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. --> - Closes apache#9323. # Rationale for this change This bug seems to have existed for at least the last 5 years (before that, array equality had a lot more issues with offsets). <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> # What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> # Are these changes tested? Tested with unit tests. # Are there any user-facing changes? no
1 parent 9e822e0 commit fe14c12

File tree

3 files changed

+55
-12
lines changed

3 files changed

+55
-12
lines changed

arrow-array/src/array/string_array.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,4 +551,12 @@ mod tests {
551551
let err_return = array.into_builder().unwrap_err();
552552
assert_eq!(&err_return, &shared_array);
553553
}
554+
555+
#[test]
556+
fn test_non_null_string_array_equal() {
557+
let a = StringArray::from(vec![Some("ab"), Some("c")]);
558+
let b = StringArray::from(vec![Some("a"), Some("bc")]);
559+
560+
assert_ne!(a, b);
561+
}
554562
}

arrow-data/src/equal/list.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use num_integer::Integer;
2121

2222
use super::equal_range;
2323

24-
fn lengths_equal<T: ArrowNativeType + Integer>(lhs: &[T], rhs: &[T]) -> bool {
24+
pub(super) fn lengths_equal<T: ArrowNativeType + Integer>(lhs: &[T], rhs: &[T]) -> bool {
2525
// invariant from `base_equal`
2626
debug_assert_eq!(lhs.len(), rhs.len());
2727

arrow-data/src/equal/variable_size.rs

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use super::utils::equal_len;
1819
use crate::data::{ArrayData, contains_nulls};
20+
use crate::equal::list::lengths_equal;
1921
use arrow_buffer::ArrowNativeType;
2022
use num_integer::Integer;
2123

22-
use super::utils::equal_len;
23-
2424
fn offset_value_equal<T: ArrowNativeType + Integer>(
2525
lhs_values: &[u8],
2626
rhs_values: &[u8],
@@ -63,15 +63,18 @@ pub(super) fn variable_sized_equal<T: ArrowNativeType + Integer>(
6363
// Only checking one null mask here because by the time the control flow reaches
6464
// this point, the equality of the two masks would have already been verified.
6565
if !contains_nulls(lhs.nulls(), lhs_start, len) {
66-
offset_value_equal(
67-
lhs_values,
68-
rhs_values,
69-
lhs_offsets,
70-
rhs_offsets,
71-
lhs_start,
72-
rhs_start,
73-
len,
74-
)
66+
let lhs_offsets_slice = &lhs_offsets[lhs_start..lhs_start + len + 1];
67+
let rhs_offsets_slice = &rhs_offsets[rhs_start..rhs_start + len + 1];
68+
lengths_equal(lhs_offsets_slice, rhs_offsets_slice)
69+
&& offset_value_equal(
70+
lhs_values,
71+
rhs_values,
72+
lhs_offsets,
73+
rhs_offsets,
74+
lhs_start,
75+
rhs_start,
76+
len,
77+
)
7578
} else {
7679
(0..len).all(|i| {
7780
let lhs_pos = lhs_start + i;
@@ -95,3 +98,35 @@ pub(super) fn variable_sized_equal<T: ArrowNativeType + Integer>(
9598
})
9699
}
97100
}
101+
102+
#[cfg(test)]
103+
mod tests {
104+
use crate::ArrayData;
105+
use crate::equal::variable_size::variable_sized_equal;
106+
use arrow_buffer::Buffer;
107+
use arrow_schema::DataType;
108+
109+
#[test]
110+
fn test_variable_sized_equal_diff_offsets() {
111+
let a = ArrayData::builder(DataType::Utf8)
112+
.buffers(vec![
113+
Buffer::from_vec(vec![0_i32, 3, 6]),
114+
Buffer::from_slice_ref(b"foobar"),
115+
])
116+
.null_bit_buffer(Some(Buffer::from_slice_ref([0b01_u8])))
117+
.len(2)
118+
.build()
119+
.unwrap();
120+
let b = ArrayData::builder(DataType::Utf8)
121+
.buffers(vec![
122+
Buffer::from_vec(vec![0_i32, 2, 6]),
123+
Buffer::from_slice_ref(b"foobar"),
124+
])
125+
.null_bit_buffer(Some(Buffer::from_slice_ref([0b01_u8])))
126+
.len(2)
127+
.build()
128+
.unwrap();
129+
130+
assert!(!variable_sized_equal::<i32>(&a, &b, 0, 0, 2));
131+
}
132+
}

0 commit comments

Comments
 (0)