Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 123 additions & 0 deletions src/iceberg/test/resources/TableMetadataV3Valid.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
{
"format-version": 3,
"table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
"location": "s3://bucket/test/location",
"last-sequence-number": 34,
"next-row-id": 0,
"last-updated-ms": 1602638573590,
"last-column-id": 3,
"current-schema-id": 1,
"schemas": [
{
"type": "struct",
"schema-id": 0,
"fields": [
{
"id": 1,
"name": "x",
"required": true,
"type": "long"
}
]
},
{
"type": "struct",
"schema-id": 1,
"identifier-field-ids": [
1,
2
],
"fields": [
{
"id": 1,
"name": "x",
"required": true,
"type": "long"
},
{
"id": 2,
"name": "y",
"required": true,
"type": "long",
"doc": "comment"
},
{
"id": 3,
"name": "z",
"required": true,
"type": "long"
}
]
}
],
"default-spec-id": 0,
"partition-specs": [
{
"spec-id": 0,
"fields": [
{
"name": "x",
"transform": "identity",
"source-id": 1,
"field-id": 1000
}
]
}
],
"last-partition-id": 1000,
"default-sort-order-id": 3,
"sort-orders": [
{
"order-id": 3,
"fields": [
{
"transform": "identity",
"source-id": 2,
"direction": "asc",
"null-order": "nulls-first"
},
{
"transform": "bucket[4]",
"source-id": 3,
"direction": "desc",
"null-order": "nulls-last"
}
]
}
],
"properties": {},
"current-snapshot-id": 3055729675574597004,
"snapshots": [
{
"snapshot-id": 3051729675574597004,
"timestamp-ms": 1515100955770,
"sequence-number": 0,
"summary": {
"operation": "append"
},
"manifest-list": "s3://a/b/1.avro"
},
{
"snapshot-id": 3055729675574597004,
"parent-snapshot-id": 3051729675574597004,
"timestamp-ms": 1555100955770,
"sequence-number": 1,
"summary": {
"operation": "append"
},
"manifest-list": "s3://a/b/2.avro",
"schema-id": 1
}
],
"snapshot-log": [
{
"snapshot-id": 3051729675574597004,
"timestamp-ms": 1515100955770
},
{
"snapshot-id": 3055729675574597004,
"timestamp-ms": 1555100955770
}
],
"metadata-log": []
}
228 changes: 228 additions & 0 deletions src/iceberg/test/update_schema_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@

#include "iceberg/update/update_schema.h"

#include <limits>
#include <memory>
#include <vector>

#include <gtest/gtest.h>

#include "iceberg/expression/literal.h"
#include "iceberg/schema.h"
#include "iceberg/schema_field.h"
#include "iceberg/test/matchers.h"
Expand Down Expand Up @@ -82,6 +84,232 @@ TEST_F(UpdateSchemaTest, AddRequiredColumnWithAllowIncompatible) {
EXPECT_EQ(new_field.doc(), "A required string column");
}

/// Default values require a v3 table for Apply() to validate successfully.
class UpdateSchemaDefaultValueTest : public UpdateSchemaTest {
protected:
std::string MetadataResource() const override { return "TableMetadataV3Valid.json"; }
};

TEST_F(UpdateSchemaTest, AddColumnWithDefaultValueRequiresV3) {
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update->AddColumn("new_col", int32(), "An integer column", Literal::Int(42));

auto result = update->Apply();
EXPECT_THAT(result, IsError(ErrorKind::kInvalidSchema));
EXPECT_THAT(result, HasErrorMessage("is not supported until v3"));
}

TEST_F(UpdateSchemaDefaultValueTest, AddColumnWithDefaultValue) {
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update->AddColumn("new_col", int32(), "An integer column", Literal::Int(42));

ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
ICEBERG_UNWRAP_OR_FAIL(auto new_field_opt, result.schema->FindFieldByName("new_col"));
ASSERT_TRUE(new_field_opt.has_value());

const auto& new_field = new_field_opt->get();
ASSERT_NE(new_field.initial_default(), nullptr);
EXPECT_EQ(*new_field.initial_default(), Literal::Int(42));
ASSERT_NE(new_field.write_default(), nullptr);
EXPECT_EQ(*new_field.write_default(), Literal::Int(42));
}

TEST_F(UpdateSchemaDefaultValueTest, AddRequiredColumnWithDefaultValue) {
// A required column with a default does not need AllowIncompatibleChanges():
// old rows read the initial-default instead of null.
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update->AddRequiredColumn("required_col", string(), "A required string column",
Literal::String("n/a"));

ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
ICEBERG_UNWRAP_OR_FAIL(auto new_field_opt,
result.schema->FindFieldByName("required_col"));
ASSERT_TRUE(new_field_opt.has_value());

const auto& new_field = new_field_opt->get();
EXPECT_FALSE(new_field.optional());
ASSERT_NE(new_field.initial_default(), nullptr);
EXPECT_EQ(*new_field.initial_default(), Literal::String("n/a"));
ASSERT_NE(new_field.write_default(), nullptr);
EXPECT_EQ(*new_field.write_default(), Literal::String("n/a"));
}

TEST_F(UpdateSchemaDefaultValueTest, AddColumnWithMismatchedDefaultValueFails) {
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update->AddColumn("new_col", int32(), "An integer column", Literal::String("oops"));

auto result = update->Apply();
EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed));
EXPECT_THAT(result, HasErrorMessage("Cannot cast default value"));
}

TEST_F(UpdateSchemaDefaultValueTest, AddColumnWithNarrowingDefaultValueFails) {
// CastTo signals narrowing with AboveMax/BelowMin sentinels; they must not be
// stored as defaults.
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update->AddColumn("new_col", int32(), "An integer column",
Literal::Long(std::numeric_limits<int64_t>::max()));

auto result = update->Apply();
EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed));
EXPECT_THAT(result, HasErrorMessage("Cannot cast default value"));
}

TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnDefault) {
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update->AddColumn("new_col", int32(), "An integer column", Literal::Int(42))
.UpdateColumnDefault("new_col", Literal::Int(7));

ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
ICEBERG_UNWRAP_OR_FAIL(auto new_field_opt, result.schema->FindFieldByName("new_col"));
ASSERT_TRUE(new_field_opt.has_value());

const auto& new_field = new_field_opt->get();
// initial-default is fixed at column addition; write-default is updated.
ASSERT_NE(new_field.initial_default(), nullptr);
EXPECT_EQ(*new_field.initial_default(), Literal::Int(42));
ASSERT_NE(new_field.write_default(), nullptr);
EXPECT_EQ(*new_field.write_default(), Literal::Int(7));
}

TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnDefaultOnExistingColumn) {
// Updating the write-default of a pre-existing column must survive Apply().
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update->UpdateColumnDefault("x", Literal::Long(0));

ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("x"));
ASSERT_TRUE(field_opt.has_value());

const auto& field = field_opt->get();
EXPECT_EQ(field.initial_default(), nullptr);
ASSERT_NE(field.write_default(), nullptr);
EXPECT_EQ(*field.write_default(), Literal::Long(0));
}

TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnDefaultClearsWithNullopt) {
// Passing std::nullopt removes the write-default (Java parity with null).
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update->AddColumn("new_col", int32(), "An integer column", Literal::Int(42))
.UpdateColumnDefault("new_col", std::nullopt);

ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("new_col"));
ASSERT_TRUE(field_opt.has_value());

const auto& field = field_opt->get();
// initial-default stays; write-default is cleared.
ASSERT_NE(field.initial_default(), nullptr);
EXPECT_EQ(*field.initial_default(), Literal::Int(42));
EXPECT_EQ(field.write_default(), nullptr);
}

TEST_F(UpdateSchemaDefaultValueTest, AddNestedColumnPreservesNestedDefaults) {
// The added column's type gets fresh field ids; defaults on its nested fields must
// survive the reassignment.
auto nested_type = std::make_shared<StructType>(std::vector<SchemaField>{
SchemaField(/*field_id=*/100, "inner", int32(), /*optional=*/false, /*doc=*/{},
std::make_shared<const Literal>(Literal::Int(5)),
std::make_shared<const Literal>(Literal::Int(9)))});

ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update->AddColumn("outer", nested_type, "A nested column");

ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
ICEBERG_UNWRAP_OR_FAIL(auto outer_opt, result.schema->FindFieldByName("outer"));
ASSERT_TRUE(outer_opt.has_value());

const auto& outer_struct =
internal::checked_cast<const StructType&>(*outer_opt->get().type());
ASSERT_EQ(outer_struct.fields().size(), 1);
const SchemaField& inner = outer_struct.fields()[0];
ASSERT_NE(inner.initial_default(), nullptr);
EXPECT_EQ(*inner.initial_default(), Literal::Int(5));
ASSERT_NE(inner.write_default(), nullptr);
EXPECT_EQ(*inner.write_default(), Literal::Int(9));
}

TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnDefaultCastsToColumnType) {
// An int default for a long column is cast to the column type, not rejected.
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update->UpdateColumnDefault("x", Literal::Int(5));

ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("x"));
ASSERT_TRUE(field_opt.has_value());

const auto& field = field_opt->get();
ASSERT_NE(field.write_default(), nullptr);
EXPECT_EQ(*field.write_default(), Literal::Long(5));
}

TEST_F(UpdateSchemaDefaultValueTest, RequireColumnAddedWithDefault) {
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update->AddColumn("new_col", int32(), "An integer column", Literal::Int(42))
.RequireColumn("new_col");

ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
ICEBERG_UNWRAP_OR_FAIL(auto new_field_opt, result.schema->FindFieldByName("new_col"));
ASSERT_TRUE(new_field_opt.has_value());
EXPECT_FALSE(new_field_opt->get().optional());
}

TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnDocPreservesDefaultValues) {
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update->AddColumn("new_col", int32(), "An integer column", Literal::Int(42))
.UpdateColumnDoc("new_col", "updated doc");

ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("new_col"));
ASSERT_TRUE(field_opt.has_value());

const auto& field = field_opt->get();
EXPECT_EQ(field.doc(), "updated doc");
ASSERT_NE(field.initial_default(), nullptr);
EXPECT_EQ(*field.initial_default(), Literal::Int(42));
ASSERT_NE(field.write_default(), nullptr);
EXPECT_EQ(*field.write_default(), Literal::Int(42));
}

TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnTypePromotesDefaultValues) {
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update->AddColumn("new_col", int32(), "An integer column", Literal::Int(42))
.UpdateColumn("new_col", int64());

ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("new_col"));
ASSERT_TRUE(field_opt.has_value());

const auto& field = field_opt->get();
EXPECT_EQ(field.type(), int64());
ASSERT_NE(field.initial_default(), nullptr);
EXPECT_EQ(*field.initial_default(), Literal::Long(42));
ASSERT_NE(field.write_default(), nullptr);
EXPECT_EQ(*field.write_default(), Literal::Long(42));
}

TEST_F(UpdateSchemaDefaultValueTest, UpdateColumnTypePromotesDecimalDefault) {
// decimal(9,2) -> decimal(18,2) is an allowed precision widening. Literal::CastTo
// does not cast between decimal types, so the default must still be promoted (the
// unscaled value is unchanged).
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update
->AddColumn("new_col", decimal(9, 2), "A decimal column",
Literal::Decimal(1234, 9, 2))
.UpdateColumn("new_col", decimal(18, 2));

ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
ICEBERG_UNWRAP_OR_FAIL(auto field_opt, result.schema->FindFieldByName("new_col"));
ASSERT_TRUE(field_opt.has_value());

const auto& field = field_opt->get();
EXPECT_EQ(field.type()->ToString(), decimal(18, 2)->ToString());
ASSERT_NE(field.initial_default(), nullptr);
EXPECT_EQ(*field.initial_default(), Literal::Decimal(1234, 18, 2));
ASSERT_NE(field.write_default(), nullptr);
EXPECT_EQ(*field.write_default(), Literal::Decimal(1234, 18, 2));
}

TEST_F(UpdateSchemaTest, AddMultipleColumns) {
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewUpdateSchema());
update->AddColumn("col1", int32(), "First column")
Expand Down
Loading
Loading