Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ public static Object getFieldValueFromIndexedRecord(
String[] path = fieldName.split("\\.");
for (int i = 0; i < path.length; i++) {
currentSchema = currentSchema.getNonNullType();
// Value navigation here can only descend through RECORD fields. Column-stats field paths
// that traverse a MAP (".key_value.key" / ".key_value.value") or ARRAY (".list.element")
// synthetic accessor, or that hit a null intermediate value, cannot be resolved to a single
// value and yield null instead of throwing. This mirrors HoodieAvroUtils.getNestedFieldVal;
// statistics for such nested leaves are still collected from the base-file (Parquet) path.
if (currentRecord == null || !currentSchema.hasFields()) {
return null;
}
Option<HoodieSchemaField> fieldOpt = currentSchema.getField(path[i]);
if (fieldOpt.isEmpty()) {
return null;
Expand All @@ -89,7 +97,7 @@ public static Object getFieldValueFromIndexedRecord(
return value;
}
currentSchema = field.schema();
currentRecord = (IndexedRecord) value;
currentRecord = value instanceof IndexedRecord ? (IndexedRecord) value : null;
}
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Stream;

import static org.apache.hudi.avro.AvroRecordContext.getFieldValueFromIndexedRecord;
Expand Down Expand Up @@ -94,11 +97,53 @@ void testGetFieldValueNested() {
@Test
void testGetFieldValueErrorCases() {
GenericRecord record = buildRecord();
// a union that is not [null, T] does not support field lookups
assertThrows(IllegalStateException.class, () -> getFieldValueFromIndexedRecord(record, "multi.sub"));
// a union that is not [null, T] cannot be navigated into; instead of throwing, the value
// navigator returns null so callers (e.g. column-stats collection) degrade gracefully,
// matching HoodieAvroUtils.getNestedFieldVal
assertNull(getFieldValueFromIndexedRecord(record, "multi.sub"));
// an empty field name is still rejected up front
assertThrows(IllegalArgumentException.class, () -> getFieldValueFromIndexedRecord(record, ""));
}

private static final Schema COMPLEX_SCHEMA = new Schema.Parser().parse(

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤖 nit: COMPLEX_SCHEMA is a bit ambiguous — "complex" could mean Avro complex types, or just a complicated schema. Something like MAP_AND_ARRAY_SCHEMA would immediately signal what's being covered here and make the constant self-documenting when a future test author scans for it.

⚠️ AI-generated; verify before applying. React 👍/👎 to flag quality.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed to MAP_AND_ARRAY_SCHEMA in 1a78da7.

"{\"type\":\"record\",\"name\":\"complex\",\"fields\":["
+ "{\"name\":\"id\",\"type\":\"int\"},"
+ "{\"name\":\"str_map\",\"type\":[\"null\",{\"type\":\"map\",\"values\":\"string\"}],\"default\":null},"
+ "{\"name\":\"int_array\",\"type\":[\"null\",{\"type\":\"array\",\"items\":\"int\"}],\"default\":null},"
+ "{\"name\":\"rec_map\",\"type\":[\"null\",{\"type\":\"map\",\"values\":{\"type\":\"record\","
+ "\"name\":\"inner\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"}]}}],\"default\":null}]}");

@Test
void testGetFieldValueMapAndArrayLeavesReturnNull() {
GenericRecord record = new GenericData.Record(COMPLEX_SCHEMA);
record.put("id", 7);
Map<Utf8, Utf8> strMap = new HashMap<>();
strMap.put(new Utf8("a"), new Utf8("v1"));
record.put("str_map", strMap);
record.put("int_array", Arrays.asList(3, 1, 2));
// rec_map left null

// top-level scalar still resolves normally
assertEquals(7, getFieldValueFromIndexedRecord(record, "id"));

// Parquet-style synthetic accessors that traverse a MAP (".key_value.key/value") or an
// ARRAY (".list.element") cannot be resolved to a single value and must return null instead
// of throwing. Regression: these previously threw
// IllegalStateException "Cannot get field from schema type: MAP" during MOR log-append
// column-stats collection. Such nested leaves still get statistics from the base-file path.
assertNull(getFieldValueFromIndexedRecord(record, "str_map.key_value.key"));
assertNull(getFieldValueFromIndexedRecord(record, "str_map.key_value.value"));
assertNull(getFieldValueFromIndexedRecord(record, "int_array.list.element"));
// a deep path descending through a MAP into a record field also degrades to null
assertNull(getFieldValueFromIndexedRecord(record, "rec_map.key_value.value.x"));

// and null when the complex field itself is absent/null
GenericRecord empty = new GenericData.Record(COMPLEX_SCHEMA);
empty.put("id", 0);
assertNull(getFieldValueFromIndexedRecord(empty, "str_map.key_value.value"));
assertNull(getFieldValueFromIndexedRecord(empty, "int_array.list.element"));
}

@Test
void testGetFieldValueAcrossEqualSchemaInstances() {
// records from different files carry equal but distinct schema instances; both must intern to
Expand Down
Loading