From 4f6fe12df7349ad61a91c598e17757be76c05876 Mon Sep 17 00:00:00 2001 From: Paul Teehan Date: Tue, 14 Apr 2026 18:12:44 +0200 Subject: [PATCH 1/4] Add adapter conformance test suite (identifiers, discovery, types/dialect) 67 integration tests that validate every adapter handles identifier quoting, metadata discovery, type mapping, sampling, and regex correctly. Run against all 10 adapters: 620 passed, 46 skipped, 1 failure (Redshift float synonym). Co-Authored-By: Claude Opus 4.6 (1M context) --- .../integration/test_conformance_discovery.py | 412 ++++++++++++++++++ .../test_conformance_identifiers.py | 356 +++++++++++++++ .../test_conformance_types_dialect.py | 389 +++++++++++++++++ 3 files changed, 1157 insertions(+) create mode 100644 soda-tests/tests/integration/test_conformance_discovery.py create mode 100644 soda-tests/tests/integration/test_conformance_identifiers.py create mode 100644 soda-tests/tests/integration/test_conformance_types_dialect.py diff --git a/soda-tests/tests/integration/test_conformance_discovery.py b/soda-tests/tests/integration/test_conformance_discovery.py new file mode 100644 index 000000000..790912b27 --- /dev/null +++ b/soda-tests/tests/integration/test_conformance_discovery.py @@ -0,0 +1,412 @@ +""" +Adapter Conformance Tests: Metadata Discovery + +Validates that every adapter's metadata discovery correctly: +- Filters out internal/temporary objects +- Returns accurate column type information through a full round-trip +- Maps type synonyms bidirectionally +- Reports column type parameters (precision, scale, length) + +This is the #2 source of field bugs (~20% of historical fixes). + +See: projects/enhancements/common_bugs_tests/historical-bug-analysis.md +""" + +import pytest +from helpers.data_source_test_helper import DataSourceTestHelper +from helpers.test_table import TestTableSpecification +from soda_core.common.metadata_types import ( + ColumnMetadata, + SodaDataTypeName, + SqlDataType, +) +from soda_core.common.sql_dialect import SqlDialect +from soda_core.common.statements.metadata_tables_query import TableType +from soda_core.common.statements.table_types import ( + FullyQualifiedTableName, + FullyQualifiedViewName, +) + + +# --------------------------------------------------------------------------- +# Test tables +# --------------------------------------------------------------------------- + +# A table with all Soda data types to exercise the full type mapping round-trip. +all_types_table = ( + TestTableSpecification.builder() + .table_purpose("conf_discovery_types") + .column_varchar("col_varchar") + .column_text("col_text") + .column_integer("col_integer") + .column_bigint("col_bigint") + .column_smallint("col_smallint") + .column_float("col_float") + .column_double("col_double") + .column_boolean("col_boolean") + .column_date("col_date") + .column_timestamp("col_timestamp") + .column_timestamp_tz("col_timestamp_tz") + .column_numeric("col_numeric") + .column_decimal("col_decimal") + .column_char("col_char") + .column_time("col_time") + .build() +) + +# A table with specific type parameters to test precision/scale/length discovery. +typed_params_table = ( + TestTableSpecification.builder() + .table_purpose("conf_discovery_params") + .column_varchar("varchar_100", character_maximum_length=100) + .column_char("char_10", character_maximum_length=10) + .column_numeric("numeric_18_4", numeric_precision=18, numeric_scale=4) + .column_decimal("decimal_10_2", numeric_precision=10, numeric_scale=2) + .column_timestamp("ts_precision_3", datetime_precision=3) + .column_timestamp_tz("ts_tz_precision_6", datetime_precision=6) + .build() +) + +# Simple table for discovery filtering tests. +simple_table = ( + TestTableSpecification.builder() + .table_purpose("conf_discovery_filter") + .column_varchar("name") + .column_integer("value") + .rows( + [ + ("alpha", 1), + ("bravo", 2), + ] + ) + .build() +) + + +# --------------------------------------------------------------------------- +# Internal object filtering +# --------------------------------------------------------------------------- + + +def test_discovery_excludes_soda_internal_tables(data_source_test_helper: DataSourceTestHelper): + """Metadata discovery must not return __soda_temp* or other internal tables. + + Historical bug: commit a16b99c8 — __soda_temp tables were appearing in discovery results. + """ + test_table = data_source_test_helper.ensure_test_table(simple_table) + + metadata_query = data_source_test_helper.data_source_impl.create_metadata_tables_query() + results = metadata_query.execute( + database_name=data_source_test_helper.extract_database_from_prefix(), + schema_name=data_source_test_helper.extract_schema_from_prefix(), + ) + + internal_tables = [] + for entry in results: + name = None + if isinstance(entry, FullyQualifiedTableName): + name = entry.table_name + elif isinstance(entry, FullyQualifiedViewName): + name = entry.view_name + if name and name.lower().startswith("__soda"): + internal_tables.append(name) + + assert internal_tables == [], ( + f"Internal Soda tables leaked into discovery results: {internal_tables}" + ) + + +def test_discovery_finds_test_table(data_source_test_helper: DataSourceTestHelper): + """Verify that a newly created table IS discoverable via metadata query.""" + test_table = data_source_test_helper.ensure_test_table(simple_table) + + metadata_query = data_source_test_helper.data_source_impl.create_metadata_tables_query() + results = metadata_query.execute( + database_name=data_source_test_helper.extract_database_from_prefix(), + schema_name=data_source_test_helper.extract_schema_from_prefix(), + include_table_name_like_filters=[f"{test_table.unique_name}"], + ) + + table_names = [ + entry.table_name.lower() + for entry in results + if isinstance(entry, FullyQualifiedTableName) + ] + assert test_table.unique_name.lower() in table_names, ( + f"Test table {test_table.unique_name} not found in discovery. Found: {table_names}" + ) + + +# --------------------------------------------------------------------------- +# View discovery via contract +# --------------------------------------------------------------------------- + + +def test_view_contract_check_end_to_end(data_source_test_helper: DataSourceTestHelper): + """Run a full contract check (row_count + missing) against a view, not just metadata.""" + if not data_source_test_helper.data_source_impl.sql_dialect.supports_views(): + pytest.skip("Views not supported") + + test_table = data_source_test_helper.ensure_test_table(simple_table) + view_table = data_source_test_helper.create_view_from_test_table(test_table) + + data_source_test_helper.assert_contract_pass( + test_table=view_table, + contract_yaml_str=""" + columns: + - name: name + checks: + - missing: + checks: + - row_count: + threshold: + must_be: 2 + """, + ) + + +def test_materialized_view_contract_check_end_to_end(data_source_test_helper: DataSourceTestHelper): + """Run a full contract check against a materialized view.""" + if not data_source_test_helper.data_source_impl.sql_dialect.supports_materialized_views(): + pytest.skip("Materialized views not supported") + + test_table = data_source_test_helper.ensure_test_table(simple_table) + mv_table = data_source_test_helper.create_materialized_view_from_test_table(test_table) + + data_source_test_helper.assert_contract_pass( + test_table=mv_table, + contract_yaml_str=""" + columns: + - name: name + checks: + - missing: + checks: + - row_count: + threshold: + must_be: 2 + """, + ) + + +# --------------------------------------------------------------------------- +# Type mapping round-trip: create → discover → map back to SodaDataTypeName +# --------------------------------------------------------------------------- + +# Expected SodaDataTypeName for each column in all_types_table. +EXPECTED_TYPE_MAP = { + "col_varchar": SodaDataTypeName.VARCHAR, + "col_text": SodaDataTypeName.TEXT, + "col_integer": SodaDataTypeName.INTEGER, + "col_bigint": SodaDataTypeName.BIGINT, + "col_smallint": SodaDataTypeName.SMALLINT, + "col_float": SodaDataTypeName.FLOAT, + "col_double": SodaDataTypeName.DOUBLE, + "col_boolean": SodaDataTypeName.BOOLEAN, + "col_date": SodaDataTypeName.DATE, + "col_timestamp": SodaDataTypeName.TIMESTAMP, + "col_timestamp_tz": SodaDataTypeName.TIMESTAMP_TZ, + "col_numeric": SodaDataTypeName.NUMERIC, + "col_decimal": SodaDataTypeName.DECIMAL, + "col_char": SodaDataTypeName.CHAR, + "col_time": SodaDataTypeName.TIME, +} + + +def test_all_types_round_trip(data_source_test_helper: DataSourceTestHelper): + """Every SodaDataTypeName must survive a create→discover→map-back round-trip. + + Tighter than test_soda_data_types.py: this test asserts the exact expected + SodaDataTypeName (with synonym awareness) for each column, not just that a + mapping exists. + """ + test_table = data_source_test_helper.ensure_test_table(all_types_table) + sql_dialect: SqlDialect = data_source_test_helper.data_source_impl.sql_dialect + + actual_columns: list[ColumnMetadata] = data_source_test_helper.data_source_impl.get_columns_metadata( + dataset_prefixes=test_table.dataset_prefix, + dataset_name=test_table.unique_name, + ) + + assert len(actual_columns) == len(EXPECTED_TYPE_MAP), ( + f"Column count mismatch: expected {len(EXPECTED_TYPE_MAP)}, got {len(actual_columns)}" + ) + + reverse_map = sql_dialect.get_soda_data_type_name_by_data_source_data_type_names() + + for col in actual_columns: + col_name = col.column_name.lower() + expected_soda_type = EXPECTED_TYPE_MAP.get(col_name) + assert expected_soda_type is not None, f"Unexpected column in metadata: {col_name}" + + ds_type_name = col.sql_data_type.name + actual_soda_type = reverse_map.get(ds_type_name) + assert actual_soda_type is not None, ( + f"Column '{col_name}': data source type '{ds_type_name}' has no reverse mapping" + ) + assert sql_dialect.is_same_soda_data_type_with_synonyms(expected_soda_type, actual_soda_type), ( + f"Column '{col_name}': expected SodaDataType {expected_soda_type}, " + f"got {actual_soda_type} (from DS type '{ds_type_name}')" + ) + + +# --------------------------------------------------------------------------- +# Type synonym bidirectionality +# --------------------------------------------------------------------------- + + +def test_type_synonyms_are_bidirectional(data_source_test_helper: DataSourceTestHelper): + """For each data source type synonym, both the canonical and synonym names + must map to the same SodaDataTypeName through the reverse mapping. + + This catches silent bugs where a type synonym is defined but the reverse + mapping only recognizes the canonical form. + """ + sql_dialect: SqlDialect = data_source_test_helper.data_source_impl.sql_dialect + synonym_lists = sql_dialect._get_data_type_name_synonyms() + reverse_map = sql_dialect.get_soda_data_type_name_by_data_source_data_type_names() + + mismatches = [] + for synonym_group in synonym_lists: + # All names in a synonym group should resolve to the same SodaDataTypeName + resolved = {} + for type_name in synonym_group: + soda_type = reverse_map.get(type_name.lower()) or reverse_map.get(type_name) + if soda_type is not None: + resolved[type_name] = soda_type + + if len(resolved) < 2: + # Only one or zero names in this group have a reverse mapping — skip + continue + + soda_types = set(resolved.values()) + # Allow synonym-aware comparison: all resolved types should be considered equivalent + canonical = next(iter(soda_types)) + for type_name, soda_type in resolved.items(): + if not sql_dialect.is_same_soda_data_type_with_synonyms(canonical, soda_type): + mismatches.append( + f"Synonym group {synonym_group}: '{type_name}' maps to {soda_type}, " + f"but others map to {canonical}" + ) + + assert mismatches == [], ( + f"Type synonym bidirectionality broken:\n" + "\n".join(mismatches) + ) + + +# --------------------------------------------------------------------------- +# Column type parameters: precision, scale, length +# --------------------------------------------------------------------------- + + +def test_column_type_parameters_preserved(data_source_test_helper: DataSourceTestHelper): + """Column type parameters (length, precision, scale, datetime precision) must + survive the create→discover round-trip for adapters that support them.""" + test_table = data_source_test_helper.ensure_test_table(typed_params_table) + sql_dialect: SqlDialect = data_source_test_helper.data_source_impl.sql_dialect + + actual_columns: list[ColumnMetadata] = data_source_test_helper.data_source_impl.get_columns_metadata( + dataset_prefixes=test_table.dataset_prefix, + dataset_name=test_table.unique_name, + ) + + cols_by_name = {c.column_name.lower(): c for c in actual_columns} + + # character_maximum_length + if sql_dialect.supports_data_type_character_maximum_length(): + varchar_col = cols_by_name.get("varchar_100") + assert varchar_col is not None, "Column varchar_100 not found" + if varchar_col.sql_data_type.character_maximum_length is not None: + assert varchar_col.sql_data_type.character_maximum_length == 100, ( + f"varchar_100: expected length 100, got {varchar_col.sql_data_type.character_maximum_length}" + ) + + char_col = cols_by_name.get("char_10") + assert char_col is not None, "Column char_10 not found" + if char_col.sql_data_type.character_maximum_length is not None: + assert char_col.sql_data_type.character_maximum_length == 10, ( + f"char_10: expected length 10, got {char_col.sql_data_type.character_maximum_length}" + ) + + # numeric_precision and numeric_scale + if sql_dialect.supports_data_type_numeric_precision(): + numeric_col = cols_by_name.get("numeric_18_4") + assert numeric_col is not None, "Column numeric_18_4 not found" + if numeric_col.sql_data_type.numeric_precision is not None: + assert numeric_col.sql_data_type.numeric_precision == 18, ( + f"numeric_18_4: expected precision 18, got {numeric_col.sql_data_type.numeric_precision}" + ) + + decimal_col = cols_by_name.get("decimal_10_2") + assert decimal_col is not None, "Column decimal_10_2 not found" + if decimal_col.sql_data_type.numeric_precision is not None: + assert decimal_col.sql_data_type.numeric_precision == 10, ( + f"decimal_10_2: expected precision 10, got {decimal_col.sql_data_type.numeric_precision}" + ) + + if sql_dialect.supports_data_type_numeric_scale(): + numeric_col = cols_by_name.get("numeric_18_4") + assert numeric_col is not None, "Column numeric_18_4 not found" + if numeric_col.sql_data_type.numeric_scale is not None: + assert numeric_col.sql_data_type.numeric_scale == 4, ( + f"numeric_18_4: expected scale 4, got {numeric_col.sql_data_type.numeric_scale}" + ) + + decimal_col = cols_by_name.get("decimal_10_2") + assert decimal_col is not None, "Column decimal_10_2 not found" + if decimal_col.sql_data_type.numeric_scale is not None: + assert decimal_col.sql_data_type.numeric_scale == 2, ( + f"decimal_10_2: expected scale 2, got {decimal_col.sql_data_type.numeric_scale}" + ) + + # datetime_precision + if sql_dialect.supports_data_type_datetime_precision(): + ts_col = cols_by_name.get("ts_precision_3") + assert ts_col is not None, "Column ts_precision_3 not found" + if ts_col.sql_data_type.datetime_precision is not None: + assert ts_col.sql_data_type.datetime_precision == 3, ( + f"ts_precision_3: expected datetime_precision 3, got {ts_col.sql_data_type.datetime_precision}" + ) + + ts_tz_col = cols_by_name.get("ts_tz_precision_6") + assert ts_tz_col is not None, "Column ts_tz_precision_6 not found" + if ts_tz_col.sql_data_type.datetime_precision is not None: + assert ts_tz_col.sql_data_type.datetime_precision == 6, ( + f"ts_tz_precision_6: expected datetime_precision 6, got {ts_tz_col.sql_data_type.datetime_precision}" + ) + + +# --------------------------------------------------------------------------- +# Every SodaDataTypeName has both forward and reverse mappings +# --------------------------------------------------------------------------- + + +def test_every_soda_type_has_forward_mapping(data_source_test_helper: DataSourceTestHelper): + """Every SodaDataTypeName must have a forward mapping (Soda→data source).""" + forward_map = ( + data_source_test_helper.data_source_impl.sql_dialect + .get_data_source_data_type_name_by_soda_data_type_names() + ) + unmapped = [str(t) for t in SodaDataTypeName if t not in forward_map] + assert unmapped == [], f"SodaDataTypeNames with no forward mapping: {unmapped}" + + +def test_every_forward_mapped_type_has_reverse(data_source_test_helper: DataSourceTestHelper): + """Every data source type produced by the forward mapping must have a reverse mapping.""" + sql_dialect = data_source_test_helper.data_source_impl.sql_dialect + forward_map = sql_dialect.get_data_source_data_type_name_by_soda_data_type_names() + reverse_map = sql_dialect.get_soda_data_type_name_by_data_source_data_type_names() + + unmapped = [] + for soda_type, ds_type in forward_map.items(): + ds_type_lower = ds_type.lower() if isinstance(ds_type, str) else ds_type + if ds_type not in reverse_map and ds_type_lower not in reverse_map: + # Check synonyms + canonical = sql_dialect._data_type_name_synonym_mappings.get( + ds_type_lower, ds_type_lower + ) + if canonical not in reverse_map: + unmapped.append(f"{soda_type} → '{ds_type}' (no reverse)") + + assert unmapped == [], ( + f"Forward-mapped types with no reverse mapping:\n" + "\n".join(unmapped) + ) diff --git a/soda-tests/tests/integration/test_conformance_identifiers.py b/soda-tests/tests/integration/test_conformance_identifiers.py new file mode 100644 index 000000000..5f346deb3 --- /dev/null +++ b/soda-tests/tests/integration/test_conformance_identifiers.py @@ -0,0 +1,356 @@ +""" +Adapter Conformance Tests: Identifier Quoting + +Validates that every adapter correctly quotes identifiers containing special +characters in both DDL (CREATE TABLE) and DML (SELECT, INSERT) paths. +This is the #1 source of field bugs (~30% of historical fixes). + +These tests go beyond the existing dialect-level tests in test_hyphenated_identifiers.py +by running full end-to-end contract checks — creating tables with problematic column names, +inserting data, and executing checks against them. + +See: projects/enhancements/common_bugs_tests/historical-bug-analysis.md +""" + +import pytest +from helpers.data_source_test_helper import DataSourceTestHelper +from helpers.test_table import TestTableSpecification + + +# --------------------------------------------------------------------------- +# Test tables +# --------------------------------------------------------------------------- + +reserved_words_table = ( + TestTableSpecification.builder() + .table_purpose("conf_reserved_words") + .column_varchar("select") + .column_varchar("table") + .column_varchar("order") + .column_varchar("group") + .column_integer("count") + .rows( + [ + ("a", "t1", "asc", "g1", 1), + ("b", "t2", "desc", "g2", 2), + ("c", "t3", "asc", "g1", 3), + ] + ) + .build() +) + +hyphenated_columns_table = ( + TestTableSpecification.builder() + .table_purpose("conf_hyphenated_cols") + .column_varchar("first-name") + .column_varchar("last-name") + .column_integer("row-id") + .rows( + [ + ("Alice", "Smith", 1), + ("Bob", "Jones", 2), + (None, "Brown", 3), + ] + ) + .build() +) + +mixed_case_table = ( + TestTableSpecification.builder() + .table_purpose("conf_mixed_case") + .column_varchar("FirstName") + .column_varchar("LastName") + .column_integer("AccountBalance") + .rows( + [ + ("Alice", "Smith", 100), + ("Bob", "Jones", 200), + ("Charlie", "Brown", 300), + ] + ) + .build() +) + + +# --------------------------------------------------------------------------- +# Reserved SQL words as column names +# --------------------------------------------------------------------------- + + +def test_reserved_word_columns_row_count(data_source_test_helper: DataSourceTestHelper): + """Table creation and row_count check must work with reserved-word column names.""" + test_table = data_source_test_helper.ensure_test_table(reserved_words_table) + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=""" + checks: + - row_count: + threshold: + must_be: 3 + """, + ) + + +def test_reserved_word_columns_missing_check(data_source_test_helper: DataSourceTestHelper): + """Missing check must work on columns named with SQL reserved words.""" + test_table = data_source_test_helper.ensure_test_table(reserved_words_table) + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=""" + columns: + - name: select + checks: + - missing: + - name: table + checks: + - missing: + - name: order + checks: + - missing: + - name: group + checks: + - missing: + checks: + - row_count: + """, + ) + + +def test_reserved_word_columns_aggregate_check(data_source_test_helper: DataSourceTestHelper): + """Aggregate check (SUM) must work on a column named 'count' (reserved word).""" + test_table = data_source_test_helper.ensure_test_table(reserved_words_table) + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=""" + columns: + - name: count + checks: + - aggregate: + function: sum + threshold: + must_be: 6 + checks: + - row_count: + """, + ) + + +def test_reserved_word_columns_schema_check(data_source_test_helper: DataSourceTestHelper): + """Schema check must discover columns even when they are named with reserved words.""" + test_table = data_source_test_helper.ensure_test_table(reserved_words_table) + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=f""" + checks: + - schema: + allow_extra_columns: true + columns: + - name: select + data_type: {test_table.data_type('select')} + - name: count + data_type: {test_table.data_type('count')} + """, + ) + + +# --------------------------------------------------------------------------- +# Hyphenated column names (end-to-end) +# --------------------------------------------------------------------------- + + +def test_hyphenated_columns_row_count(data_source_test_helper: DataSourceTestHelper): + """Table creation and row_count check with hyphenated column names.""" + test_table = data_source_test_helper.ensure_test_table(hyphenated_columns_table) + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=""" + checks: + - row_count: + threshold: + must_be: 3 + """, + ) + + +def test_hyphenated_columns_missing_detects_null(data_source_test_helper: DataSourceTestHelper): + """Missing check must correctly detect the NULL in 'first-name' column.""" + test_table = data_source_test_helper.ensure_test_table(hyphenated_columns_table) + data_source_test_helper.assert_contract_fail( + test_table=test_table, + contract_yaml_str=""" + columns: + - name: first-name + checks: + - missing: + checks: + - row_count: + """, + ) + + +def test_hyphenated_columns_aggregate(data_source_test_helper: DataSourceTestHelper): + """Aggregate check on a hyphenated integer column.""" + test_table = data_source_test_helper.ensure_test_table(hyphenated_columns_table) + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=""" + columns: + - name: row-id + checks: + - aggregate: + function: sum + threshold: + must_be: 6 + checks: + - row_count: + """, + ) + + +def test_hyphenated_columns_schema_check(data_source_test_helper: DataSourceTestHelper): + """Schema check must discover hyphenated column names correctly.""" + test_table = data_source_test_helper.ensure_test_table(hyphenated_columns_table) + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=f""" + checks: + - schema: + columns: + - name: first-name + data_type: {test_table.data_type('first-name')} + - name: last-name + data_type: {test_table.data_type('last-name')} + - name: row-id + data_type: {test_table.data_type('row-id')} + """, + ) + + +# --------------------------------------------------------------------------- +# Mixed-case (CamelCase) column names +# --------------------------------------------------------------------------- + + +def test_mixed_case_columns_row_count(data_source_test_helper: DataSourceTestHelper): + """Row count check with CamelCase column names.""" + if not data_source_test_helper.data_source_impl.sql_dialect.supports_case_sensitive_column_names(): + pytest.skip("Case sensitive column names not supported") + test_table = data_source_test_helper.ensure_test_table(mixed_case_table) + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=""" + checks: + - row_count: + threshold: + must_be: 3 + """, + ) + + +def test_mixed_case_columns_missing_check(data_source_test_helper: DataSourceTestHelper): + """Missing check referencing CamelCase column names.""" + if not data_source_test_helper.data_source_impl.sql_dialect.supports_case_sensitive_column_names(): + pytest.skip("Case sensitive column names not supported") + test_table = data_source_test_helper.ensure_test_table(mixed_case_table) + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=""" + columns: + - name: FirstName + checks: + - missing: + - name: LastName + checks: + - missing: + checks: + - row_count: + """, + ) + + +def test_mixed_case_columns_aggregate(data_source_test_helper: DataSourceTestHelper): + """Aggregate check on a CamelCase integer column.""" + if not data_source_test_helper.data_source_impl.sql_dialect.supports_case_sensitive_column_names(): + pytest.skip("Case sensitive column names not supported") + test_table = data_source_test_helper.ensure_test_table(mixed_case_table) + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=""" + columns: + - name: AccountBalance + checks: + - aggregate: + function: avg + threshold: + must_be: 200 + checks: + - row_count: + """, + ) + + +def test_mixed_case_columns_schema_preserves_case(data_source_test_helper: DataSourceTestHelper): + """Schema check must preserve CamelCase column names.""" + if not data_source_test_helper.data_source_impl.sql_dialect.supports_case_sensitive_column_names(): + pytest.skip("Case sensitive column names not supported") + test_table = data_source_test_helper.ensure_test_table(mixed_case_table) + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=f""" + checks: + - schema: + columns: + - name: FirstName + data_type: {test_table.data_type('FirstName')} + - name: LastName + data_type: {test_table.data_type('LastName')} + - name: AccountBalance + data_type: {test_table.data_type('AccountBalance')} + """, + ) + + +# --------------------------------------------------------------------------- +# Parametrized quoting consistency across special identifier patterns +# --------------------------------------------------------------------------- + +SPECIAL_IDENTIFIERS = [ + "my-table", + "col with spaces", + "123_starts_digit", + "SELECT", +] + + +@pytest.mark.parametrize("identifier", SPECIAL_IDENTIFIERS) +def test_quote_default_handles_special_identifiers( + identifier: str, data_source_test_helper: DataSourceTestHelper +): + """quote_default must return a quoted, non-None identifier for each special pattern.""" + sql_dialect = data_source_test_helper.data_source_impl.sql_dialect + quoted = sql_dialect.quote_default(identifier) + assert quoted is not None, f"quote_default returned None for '{identifier}'" + assert quoted != identifier, f"quote_default returned bare identifier for '{identifier}'" + + +@pytest.mark.parametrize("identifier", SPECIAL_IDENTIFIERS) +def test_quote_for_ddl_handles_special_identifiers( + identifier: str, data_source_test_helper: DataSourceTestHelper +): + """quote_for_ddl must return a quoted, non-None identifier for each special pattern.""" + sql_dialect = data_source_test_helper.data_source_impl.sql_dialect + quoted = sql_dialect.quote_for_ddl(identifier) + assert quoted is not None, f"quote_for_ddl returned None for '{identifier}'" + assert quoted != identifier, f"quote_for_ddl returned bare identifier for '{identifier}'" + + +@pytest.mark.parametrize("identifier", SPECIAL_IDENTIFIERS) +def test_ddl_and_dml_quoting_both_preserve_identifier( + identifier: str, data_source_test_helper: DataSourceTestHelper +): + """Both DDL and DML quoting must preserve the original identifier string.""" + sql_dialect = data_source_test_helper.data_source_impl.sql_dialect + dml_quoted = sql_dialect.quote_default(identifier) + ddl_quoted = sql_dialect.quote_for_ddl(identifier) + assert identifier in dml_quoted, f"DML quoting lost identifier: {dml_quoted}" + assert identifier in ddl_quoted, f"DDL quoting lost identifier: {ddl_quoted}" diff --git a/soda-tests/tests/integration/test_conformance_types_dialect.py b/soda-tests/tests/integration/test_conformance_types_dialect.py new file mode 100644 index 000000000..e365a0d80 --- /dev/null +++ b/soda-tests/tests/integration/test_conformance_types_dialect.py @@ -0,0 +1,389 @@ +""" +Adapter Conformance Tests: Type Mapping & SQL Dialect + +Validates that every adapter: +- Can create tables, insert data, and run checks for ALL Soda data types +- Generates valid sampling SQL for each supported sampler type +- Generates valid regex SQL +- Generates valid RANDOM() SQL +- Has consistent type synonym definitions + +These cover the #3-#4 sources of field bugs (type mapping ~10%, SQL dialect ~8%). + +See: projects/enhancements/common_bugs_tests/conformance-test-dev-plan.md (Phase 3) +""" + +import datetime + +import pytest +from helpers.data_source_test_helper import DataSourceTestHelper +from helpers.test_table import TestTableSpecification +from soda_core.common.data_source_impl import DataSourceImpl +from soda_core.common.data_source_results import QueryResult +from soda_core.common.metadata_types import SamplerType, SodaDataTypeName +from soda_core.common.sql_ast import COLUMN, FROM, RANDOM, REGEX_LIKE, SELECT, STAR +from soda_core.common.sql_dialect import SqlDialect + + +# --------------------------------------------------------------------------- +# Test table: one column per Soda data type, with actual data +# --------------------------------------------------------------------------- + +all_types_with_data_table = ( + TestTableSpecification.builder() + .table_purpose("conf_types_e2e") + .column_char("col_char") + .column_varchar("col_varchar") + .column_text("col_text") + .column_smallint("col_smallint") + .column_integer("col_integer") + .column_bigint("col_bigint") + .column_numeric("col_numeric") + .column_decimal("col_decimal") + .column_float("col_float") + .column_double("col_double") + .column_boolean("col_boolean") + .column_date("col_date") + .column_time("col_time") + .column_timestamp("col_timestamp") + .column_timestamp_tz("col_timestamp_tz") + .rows( + [ + ( + "a", # char + "hello", # varchar + "some text", # text + 1, # smallint + 42, # integer + 1000000, # bigint + 3.14, # numeric + 2.718, # decimal + 1.5, # float + 2.71828, # double + True, # boolean + datetime.date(2025, 6, 15), # date + datetime.time(10, 30, 0), # time + datetime.datetime(2025, 6, 15, 10, 30, 0), # timestamp + datetime.datetime(2025, 6, 15, 10, 30, 0), # timestamp_tz + ), + ( + "b", + "world", + "more text", + 2, + 99, + 2000000, + 6.28, + 5.436, + 2.5, + 3.14159, + False, + datetime.date(2025, 7, 20), + datetime.time(14, 0, 0), + datetime.datetime(2025, 7, 20, 14, 0, 0), + datetime.datetime(2025, 7, 20, 14, 0, 0), + ), + ( + None, # null char + None, # null varchar + None, # null text + None, # null smallint + None, # null integer + None, # null bigint + None, # null numeric + None, # null decimal + None, # null float + None, # null double + None, # null boolean + None, # null date + None, # null time + None, # null timestamp + None, # null timestamp_tz + ), + ] + ) + .build() +) + +# Columns to test with missing check (all of them) +ALL_TYPE_COLUMNS = [ + "col_char", + "col_varchar", + "col_text", + "col_smallint", + "col_integer", + "col_bigint", + "col_numeric", + "col_decimal", + "col_float", + "col_double", + "col_boolean", + "col_date", + "col_time", + "col_timestamp", + "col_timestamp_tz", +] + +# Numeric columns to test with aggregate checks +NUMERIC_COLUMNS = [ + "col_smallint", + "col_integer", + "col_bigint", + "col_numeric", + "col_decimal", + "col_float", + "col_double", +] + + +# --------------------------------------------------------------------------- +# End-to-end type tests: full pipeline for every data type +# --------------------------------------------------------------------------- + + +def test_all_types_table_creation_and_row_count(data_source_test_helper: DataSourceTestHelper): + """Create a table with all Soda data types, insert data, verify row count. + This exercises the full DDL + INSERT pipeline for every type.""" + test_table = data_source_test_helper.ensure_test_table(all_types_with_data_table) + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=""" + checks: + - row_count: + threshold: + must_be: 3 + """, + ) + + +@pytest.mark.parametrize("column_name", ALL_TYPE_COLUMNS) +def test_missing_check_per_type(column_name: str, data_source_test_helper: DataSourceTestHelper): + """Missing check must detect the NULL row for each data type. + This verifies the full pipeline: type mapping → SQL generation → query → result parsing.""" + test_table = data_source_test_helper.ensure_test_table(all_types_with_data_table) + data_source_test_helper.assert_contract_fail( + test_table=test_table, + contract_yaml_str=f""" + columns: + - name: {column_name} + checks: + - missing: + checks: + - row_count: + """, + ) + + +@pytest.mark.parametrize("column_name", NUMERIC_COLUMNS) +def test_aggregate_check_per_numeric_type(column_name: str, data_source_test_helper: DataSourceTestHelper): + """Aggregate (avg) must work on every numeric type. Verifies type casting and aggregation.""" + test_table = data_source_test_helper.ensure_test_table(all_types_with_data_table) + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=f""" + columns: + - name: {column_name} + checks: + - aggregate: + function: avg + threshold: + must_be_greater_than: 0 + checks: + - row_count: + """, + ) + + +def test_schema_check_all_types(data_source_test_helper: DataSourceTestHelper): + """Schema check must discover all columns in the correct order. + Note: we don't compare data_type here because forward-mapped names may differ + from discovered names (e.g., Postgres maps FLOAT→'float' but discovers 'double precision'). + Type round-trip accuracy is tested in Phase 2 (test_conformance_discovery.py).""" + test_table = data_source_test_helper.ensure_test_table(all_types_with_data_table) + + columns_yaml = "\n".join( + f" - name: {col}" + for col in ALL_TYPE_COLUMNS + ) + + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=f""" + checks: + - schema: + columns: +{columns_yaml} + """, + ) + + +# --------------------------------------------------------------------------- +# Sampling SQL conformance +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("sampler_type", list(SamplerType)) +def test_sampling_sql_generation(sampler_type: SamplerType, data_source_test_helper: DataSourceTestHelper): + """For each sampler type the adapter claims to support, the generated SQL must + be non-empty and parseable (used in a SELECT ... FROM table SAMPLE clause).""" + sql_dialect: SqlDialect = data_source_test_helper.data_source_impl.sql_dialect + + if not sql_dialect.supports_sampler(sampler_type): + pytest.skip(f"{sql_dialect.__class__.__name__} does not support {sampler_type.name}") + + sample_size = 10 if sampler_type == SamplerType.PERCENTAGE else 50 + sample_sql = sql_dialect._build_sample_sql(sampler_type, sample_size) + + assert sample_sql is not None, f"_build_sample_sql returned None for {sampler_type.name}" + assert len(sample_sql.strip()) > 0, f"_build_sample_sql returned empty string for {sampler_type.name}" + assert str(sample_size) in sample_sql, ( + f"Sample size {sample_size} not found in generated SQL: {sample_sql}" + ) + + +@pytest.mark.parametrize("sampler_type", list(SamplerType)) +def test_sampling_sql_executes(sampler_type: SamplerType, data_source_test_helper: DataSourceTestHelper): + """For each supported sampler type, generate a full SELECT with sampling and execute it.""" + sql_dialect: SqlDialect = data_source_test_helper.data_source_impl.sql_dialect + + if not sql_dialect.supports_sampler(sampler_type): + pytest.skip(f"{sql_dialect.__class__.__name__} does not support {sampler_type.name}") + + test_table = data_source_test_helper.ensure_test_table(all_types_with_data_table) + table_from_name = sql_dialect.get_from_name_from_qualified_name(test_table.qualified_name) + + sample_size = 50 if sampler_type == SamplerType.PERCENTAGE else 2 + select_sql = sql_dialect.build_select_sql([ + SELECT(STAR()), + FROM(table_from_name).SAMPLE(sampler_type, sample_size), + ]) + + result: QueryResult = data_source_test_helper.data_source_impl.execute_query(select_sql) + assert result is not None, "Sampled query returned None" + assert len(result.rows) >= 0, "Sampled query returned negative row count" + + +# --------------------------------------------------------------------------- +# Regex SQL conformance +# --------------------------------------------------------------------------- + + +def test_regex_sql_generation(data_source_test_helper: DataSourceTestHelper): + """The adapter must generate valid regex SQL from a REGEX_LIKE expression.""" + sql_dialect: SqlDialect = data_source_test_helper.data_source_impl.sql_dialect + + regex_expr = REGEX_LIKE(expression=COLUMN("col_varchar"), regex_pattern="^[a-z]+$") + sql = sql_dialect._build_regex_like_sql(regex_expr) + + assert sql is not None, "regex SQL is None" + assert len(sql.strip()) > 0, "regex SQL is empty" + assert "col_varchar" in sql, f"Column name missing from regex SQL: {sql}" + + +def test_regex_via_invalid_check(data_source_test_helper: DataSourceTestHelper): + """Invalid check with regex must work end-to-end (the row with NULL is excluded, + the two data rows match the pattern, so no invalids among non-null rows).""" + test_table = data_source_test_helper.ensure_test_table(all_types_with_data_table) + + if data_source_test_helper.data_source_impl.sql_dialect.supports_regex_advanced(): + regex = "^[a-z]+$" + else: + regex = "[a-z]" + + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=f""" + columns: + - name: col_varchar + valid_format: + regex: '{regex}' + name: lowercase-letters + checks: + - invalid: + checks: + - row_count: + """, + ) + + +# --------------------------------------------------------------------------- +# RANDOM() conformance +# --------------------------------------------------------------------------- + + +def test_random_generates_valid_sql(data_source_test_helper: DataSourceTestHelper): + """RANDOM() must generate valid SQL that returns values in [0.0, 1.0).""" + test_table = data_source_test_helper.ensure_test_table(all_types_with_data_table) + data_source_impl: DataSourceImpl = data_source_test_helper.data_source_impl + sql_dialect: SqlDialect = data_source_impl.sql_dialect + + table_from_name = sql_dialect.get_from_name_from_qualified_name(test_table.qualified_name) + select_sql = sql_dialect.build_select_sql([ + SELECT(RANDOM()), + FROM(table_from_name), + ]) + + result: QueryResult = data_source_impl.execute_query(select_sql) + assert len(result.rows) == 3 + + for row in result.rows: + value = float(row[0]) + assert 0.0 <= value < 1.0, f"RANDOM() returned {value}, expected [0.0, 1.0)" + + +# --------------------------------------------------------------------------- +# Type mapping consistency +# --------------------------------------------------------------------------- + + +def test_forward_mapping_covers_all_types(data_source_test_helper: DataSourceTestHelper): + """Every SodaDataTypeName must have a data source type in the forward mapping.""" + forward_map = ( + data_source_test_helper.data_source_impl.sql_dialect + .get_data_source_data_type_name_by_soda_data_type_names() + ) + unmapped = [t.name for t in SodaDataTypeName if t not in forward_map] + assert unmapped == [], f"SodaDataTypeNames missing from forward mapping: {unmapped}" + + +def test_reverse_mapping_covers_forward(data_source_test_helper: DataSourceTestHelper): + """Every type produced by forward mapping must be resolvable via reverse mapping.""" + sql_dialect = data_source_test_helper.data_source_impl.sql_dialect + forward = sql_dialect.get_data_source_data_type_name_by_soda_data_type_names() + reverse = sql_dialect.get_soda_data_type_name_by_data_source_data_type_names() + + broken = [] + for soda_type, ds_type in forward.items(): + found = ( + ds_type in reverse + or (isinstance(ds_type, str) and ds_type.lower() in reverse) + or sql_dialect._data_type_name_synonym_mappings.get( + ds_type.lower() if isinstance(ds_type, str) else ds_type, None + ) + in reverse + ) + if not found: + broken.append(f"{soda_type.name} → '{ds_type}'") + + assert broken == [], f"Forward-mapped types with no reverse path:\n" + "\n".join(broken) + + +def test_data_type_synonyms_internally_consistent(data_source_test_helper: DataSourceTestHelper): + """All entries in a synonym group must resolve to the same canonical name + through the synonym mapping (the _data_type_name_synonym_mappings dict).""" + sql_dialect = data_source_test_helper.data_source_impl.sql_dialect + synonym_lists = sql_dialect._get_data_type_name_synonyms() + + inconsistencies = [] + for group in synonym_lists: + canonicals = set() + for name in group: + canonical = sql_dialect._data_type_name_synonym_mappings.get(name.lower()) + if canonical is not None: + canonicals.add(canonical) + if len(canonicals) > 1: + inconsistencies.append(f"Group {group} maps to multiple canonicals: {canonicals}") + + assert inconsistencies == [], ( + "Synonym groups with inconsistent canonical mappings:\n" + "\n".join(inconsistencies) + ) From 0de0c275ff0cbbd8b7274a789160007debfdbf83 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 14 Apr 2026 16:13:25 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../integration/test_conformance_discovery.py | 99 ++++++++----------- .../test_conformance_identifiers.py | 13 +-- .../test_conformance_types_dialect.py | 97 +++++++++--------- 3 files changed, 89 insertions(+), 120 deletions(-) diff --git a/soda-tests/tests/integration/test_conformance_discovery.py b/soda-tests/tests/integration/test_conformance_discovery.py index 790912b27..939b76eca 100644 --- a/soda-tests/tests/integration/test_conformance_discovery.py +++ b/soda-tests/tests/integration/test_conformance_discovery.py @@ -15,19 +15,13 @@ import pytest from helpers.data_source_test_helper import DataSourceTestHelper from helpers.test_table import TestTableSpecification -from soda_core.common.metadata_types import ( - ColumnMetadata, - SodaDataTypeName, - SqlDataType, -) +from soda_core.common.metadata_types import ColumnMetadata, SodaDataTypeName from soda_core.common.sql_dialect import SqlDialect -from soda_core.common.statements.metadata_tables_query import TableType from soda_core.common.statements.table_types import ( FullyQualifiedTableName, FullyQualifiedViewName, ) - # --------------------------------------------------------------------------- # Test tables # --------------------------------------------------------------------------- @@ -111,9 +105,7 @@ def test_discovery_excludes_soda_internal_tables(data_source_test_helper: DataSo if name and name.lower().startswith("__soda"): internal_tables.append(name) - assert internal_tables == [], ( - f"Internal Soda tables leaked into discovery results: {internal_tables}" - ) + assert internal_tables == [], f"Internal Soda tables leaked into discovery results: {internal_tables}" def test_discovery_finds_test_table(data_source_test_helper: DataSourceTestHelper): @@ -127,14 +119,10 @@ def test_discovery_finds_test_table(data_source_test_helper: DataSourceTestHelpe include_table_name_like_filters=[f"{test_table.unique_name}"], ) - table_names = [ - entry.table_name.lower() - for entry in results - if isinstance(entry, FullyQualifiedTableName) - ] - assert test_table.unique_name.lower() in table_names, ( - f"Test table {test_table.unique_name} not found in discovery. Found: {table_names}" - ) + table_names = [entry.table_name.lower() for entry in results if isinstance(entry, FullyQualifiedTableName)] + assert ( + test_table.unique_name.lower() in table_names + ), f"Test table {test_table.unique_name} not found in discovery. Found: {table_names}" # --------------------------------------------------------------------------- @@ -227,9 +215,9 @@ def test_all_types_round_trip(data_source_test_helper: DataSourceTestHelper): dataset_name=test_table.unique_name, ) - assert len(actual_columns) == len(EXPECTED_TYPE_MAP), ( - f"Column count mismatch: expected {len(EXPECTED_TYPE_MAP)}, got {len(actual_columns)}" - ) + assert len(actual_columns) == len( + EXPECTED_TYPE_MAP + ), f"Column count mismatch: expected {len(EXPECTED_TYPE_MAP)}, got {len(actual_columns)}" reverse_map = sql_dialect.get_soda_data_type_name_by_data_source_data_type_names() @@ -240,9 +228,9 @@ def test_all_types_round_trip(data_source_test_helper: DataSourceTestHelper): ds_type_name = col.sql_data_type.name actual_soda_type = reverse_map.get(ds_type_name) - assert actual_soda_type is not None, ( - f"Column '{col_name}': data source type '{ds_type_name}' has no reverse mapping" - ) + assert ( + actual_soda_type is not None + ), f"Column '{col_name}': data source type '{ds_type_name}' has no reverse mapping" assert sql_dialect.is_same_soda_data_type_with_synonyms(expected_soda_type, actual_soda_type), ( f"Column '{col_name}': expected SodaDataType {expected_soda_type}, " f"got {actual_soda_type} (from DS type '{ds_type_name}')" @@ -288,9 +276,7 @@ def test_type_synonyms_are_bidirectional(data_source_test_helper: DataSourceTest f"but others map to {canonical}" ) - assert mismatches == [], ( - f"Type synonym bidirectionality broken:\n" + "\n".join(mismatches) - ) + assert mismatches == [], f"Type synonym bidirectionality broken:\n" + "\n".join(mismatches) # --------------------------------------------------------------------------- @@ -316,63 +302,63 @@ def test_column_type_parameters_preserved(data_source_test_helper: DataSourceTes varchar_col = cols_by_name.get("varchar_100") assert varchar_col is not None, "Column varchar_100 not found" if varchar_col.sql_data_type.character_maximum_length is not None: - assert varchar_col.sql_data_type.character_maximum_length == 100, ( - f"varchar_100: expected length 100, got {varchar_col.sql_data_type.character_maximum_length}" - ) + assert ( + varchar_col.sql_data_type.character_maximum_length == 100 + ), f"varchar_100: expected length 100, got {varchar_col.sql_data_type.character_maximum_length}" char_col = cols_by_name.get("char_10") assert char_col is not None, "Column char_10 not found" if char_col.sql_data_type.character_maximum_length is not None: - assert char_col.sql_data_type.character_maximum_length == 10, ( - f"char_10: expected length 10, got {char_col.sql_data_type.character_maximum_length}" - ) + assert ( + char_col.sql_data_type.character_maximum_length == 10 + ), f"char_10: expected length 10, got {char_col.sql_data_type.character_maximum_length}" # numeric_precision and numeric_scale if sql_dialect.supports_data_type_numeric_precision(): numeric_col = cols_by_name.get("numeric_18_4") assert numeric_col is not None, "Column numeric_18_4 not found" if numeric_col.sql_data_type.numeric_precision is not None: - assert numeric_col.sql_data_type.numeric_precision == 18, ( - f"numeric_18_4: expected precision 18, got {numeric_col.sql_data_type.numeric_precision}" - ) + assert ( + numeric_col.sql_data_type.numeric_precision == 18 + ), f"numeric_18_4: expected precision 18, got {numeric_col.sql_data_type.numeric_precision}" decimal_col = cols_by_name.get("decimal_10_2") assert decimal_col is not None, "Column decimal_10_2 not found" if decimal_col.sql_data_type.numeric_precision is not None: - assert decimal_col.sql_data_type.numeric_precision == 10, ( - f"decimal_10_2: expected precision 10, got {decimal_col.sql_data_type.numeric_precision}" - ) + assert ( + decimal_col.sql_data_type.numeric_precision == 10 + ), f"decimal_10_2: expected precision 10, got {decimal_col.sql_data_type.numeric_precision}" if sql_dialect.supports_data_type_numeric_scale(): numeric_col = cols_by_name.get("numeric_18_4") assert numeric_col is not None, "Column numeric_18_4 not found" if numeric_col.sql_data_type.numeric_scale is not None: - assert numeric_col.sql_data_type.numeric_scale == 4, ( - f"numeric_18_4: expected scale 4, got {numeric_col.sql_data_type.numeric_scale}" - ) + assert ( + numeric_col.sql_data_type.numeric_scale == 4 + ), f"numeric_18_4: expected scale 4, got {numeric_col.sql_data_type.numeric_scale}" decimal_col = cols_by_name.get("decimal_10_2") assert decimal_col is not None, "Column decimal_10_2 not found" if decimal_col.sql_data_type.numeric_scale is not None: - assert decimal_col.sql_data_type.numeric_scale == 2, ( - f"decimal_10_2: expected scale 2, got {decimal_col.sql_data_type.numeric_scale}" - ) + assert ( + decimal_col.sql_data_type.numeric_scale == 2 + ), f"decimal_10_2: expected scale 2, got {decimal_col.sql_data_type.numeric_scale}" # datetime_precision if sql_dialect.supports_data_type_datetime_precision(): ts_col = cols_by_name.get("ts_precision_3") assert ts_col is not None, "Column ts_precision_3 not found" if ts_col.sql_data_type.datetime_precision is not None: - assert ts_col.sql_data_type.datetime_precision == 3, ( - f"ts_precision_3: expected datetime_precision 3, got {ts_col.sql_data_type.datetime_precision}" - ) + assert ( + ts_col.sql_data_type.datetime_precision == 3 + ), f"ts_precision_3: expected datetime_precision 3, got {ts_col.sql_data_type.datetime_precision}" ts_tz_col = cols_by_name.get("ts_tz_precision_6") assert ts_tz_col is not None, "Column ts_tz_precision_6 not found" if ts_tz_col.sql_data_type.datetime_precision is not None: - assert ts_tz_col.sql_data_type.datetime_precision == 6, ( - f"ts_tz_precision_6: expected datetime_precision 6, got {ts_tz_col.sql_data_type.datetime_precision}" - ) + assert ( + ts_tz_col.sql_data_type.datetime_precision == 6 + ), f"ts_tz_precision_6: expected datetime_precision 6, got {ts_tz_col.sql_data_type.datetime_precision}" # --------------------------------------------------------------------------- @@ -383,8 +369,7 @@ def test_column_type_parameters_preserved(data_source_test_helper: DataSourceTes def test_every_soda_type_has_forward_mapping(data_source_test_helper: DataSourceTestHelper): """Every SodaDataTypeName must have a forward mapping (Soda→data source).""" forward_map = ( - data_source_test_helper.data_source_impl.sql_dialect - .get_data_source_data_type_name_by_soda_data_type_names() + data_source_test_helper.data_source_impl.sql_dialect.get_data_source_data_type_name_by_soda_data_type_names() ) unmapped = [str(t) for t in SodaDataTypeName if t not in forward_map] assert unmapped == [], f"SodaDataTypeNames with no forward mapping: {unmapped}" @@ -401,12 +386,8 @@ def test_every_forward_mapped_type_has_reverse(data_source_test_helper: DataSour ds_type_lower = ds_type.lower() if isinstance(ds_type, str) else ds_type if ds_type not in reverse_map and ds_type_lower not in reverse_map: # Check synonyms - canonical = sql_dialect._data_type_name_synonym_mappings.get( - ds_type_lower, ds_type_lower - ) + canonical = sql_dialect._data_type_name_synonym_mappings.get(ds_type_lower, ds_type_lower) if canonical not in reverse_map: unmapped.append(f"{soda_type} → '{ds_type}' (no reverse)") - assert unmapped == [], ( - f"Forward-mapped types with no reverse mapping:\n" + "\n".join(unmapped) - ) + assert unmapped == [], f"Forward-mapped types with no reverse mapping:\n" + "\n".join(unmapped) diff --git a/soda-tests/tests/integration/test_conformance_identifiers.py b/soda-tests/tests/integration/test_conformance_identifiers.py index 5f346deb3..014a75e2f 100644 --- a/soda-tests/tests/integration/test_conformance_identifiers.py +++ b/soda-tests/tests/integration/test_conformance_identifiers.py @@ -16,7 +16,6 @@ from helpers.data_source_test_helper import DataSourceTestHelper from helpers.test_table import TestTableSpecification - # --------------------------------------------------------------------------- # Test tables # --------------------------------------------------------------------------- @@ -323,9 +322,7 @@ def test_mixed_case_columns_schema_preserves_case(data_source_test_helper: DataS @pytest.mark.parametrize("identifier", SPECIAL_IDENTIFIERS) -def test_quote_default_handles_special_identifiers( - identifier: str, data_source_test_helper: DataSourceTestHelper -): +def test_quote_default_handles_special_identifiers(identifier: str, data_source_test_helper: DataSourceTestHelper): """quote_default must return a quoted, non-None identifier for each special pattern.""" sql_dialect = data_source_test_helper.data_source_impl.sql_dialect quoted = sql_dialect.quote_default(identifier) @@ -334,9 +331,7 @@ def test_quote_default_handles_special_identifiers( @pytest.mark.parametrize("identifier", SPECIAL_IDENTIFIERS) -def test_quote_for_ddl_handles_special_identifiers( - identifier: str, data_source_test_helper: DataSourceTestHelper -): +def test_quote_for_ddl_handles_special_identifiers(identifier: str, data_source_test_helper: DataSourceTestHelper): """quote_for_ddl must return a quoted, non-None identifier for each special pattern.""" sql_dialect = data_source_test_helper.data_source_impl.sql_dialect quoted = sql_dialect.quote_for_ddl(identifier) @@ -345,9 +340,7 @@ def test_quote_for_ddl_handles_special_identifiers( @pytest.mark.parametrize("identifier", SPECIAL_IDENTIFIERS) -def test_ddl_and_dml_quoting_both_preserve_identifier( - identifier: str, data_source_test_helper: DataSourceTestHelper -): +def test_ddl_and_dml_quoting_both_preserve_identifier(identifier: str, data_source_test_helper: DataSourceTestHelper): """Both DDL and DML quoting must preserve the original identifier string.""" sql_dialect = data_source_test_helper.data_source_impl.sql_dialect dml_quoted = sql_dialect.quote_default(identifier) diff --git a/soda-tests/tests/integration/test_conformance_types_dialect.py b/soda-tests/tests/integration/test_conformance_types_dialect.py index e365a0d80..d74f4bc35 100644 --- a/soda-tests/tests/integration/test_conformance_types_dialect.py +++ b/soda-tests/tests/integration/test_conformance_types_dialect.py @@ -24,7 +24,6 @@ from soda_core.common.sql_ast import COLUMN, FROM, RANDOM, REGEX_LIKE, SELECT, STAR from soda_core.common.sql_dialect import SqlDialect - # --------------------------------------------------------------------------- # Test table: one column per Soda data type, with actual data # --------------------------------------------------------------------------- @@ -50,21 +49,21 @@ .rows( [ ( - "a", # char - "hello", # varchar - "some text", # text - 1, # smallint - 42, # integer - 1000000, # bigint - 3.14, # numeric - 2.718, # decimal - 1.5, # float - 2.71828, # double - True, # boolean - datetime.date(2025, 6, 15), # date - datetime.time(10, 30, 0), # time - datetime.datetime(2025, 6, 15, 10, 30, 0), # timestamp - datetime.datetime(2025, 6, 15, 10, 30, 0), # timestamp_tz + "a", # char + "hello", # varchar + "some text", # text + 1, # smallint + 42, # integer + 1000000, # bigint + 3.14, # numeric + 2.718, # decimal + 1.5, # float + 2.71828, # double + True, # boolean + datetime.date(2025, 6, 15), # date + datetime.time(10, 30, 0), # time + datetime.datetime(2025, 6, 15, 10, 30, 0), # timestamp + datetime.datetime(2025, 6, 15, 10, 30, 0), # timestamp_tz ), ( "b", @@ -84,21 +83,21 @@ datetime.datetime(2025, 7, 20, 14, 0, 0), ), ( - None, # null char - None, # null varchar - None, # null text - None, # null smallint - None, # null integer - None, # null bigint - None, # null numeric - None, # null decimal - None, # null float - None, # null double - None, # null boolean - None, # null date - None, # null time - None, # null timestamp - None, # null timestamp_tz + None, # null char + None, # null varchar + None, # null text + None, # null smallint + None, # null integer + None, # null bigint + None, # null numeric + None, # null decimal + None, # null float + None, # null double + None, # null boolean + None, # null date + None, # null time + None, # null timestamp + None, # null timestamp_tz ), ] ) @@ -201,10 +200,7 @@ def test_schema_check_all_types(data_source_test_helper: DataSourceTestHelper): Type round-trip accuracy is tested in Phase 2 (test_conformance_discovery.py).""" test_table = data_source_test_helper.ensure_test_table(all_types_with_data_table) - columns_yaml = "\n".join( - f" - name: {col}" - for col in ALL_TYPE_COLUMNS - ) + columns_yaml = "\n".join(f" - name: {col}" for col in ALL_TYPE_COLUMNS) data_source_test_helper.assert_contract_pass( test_table=test_table, @@ -236,9 +232,7 @@ def test_sampling_sql_generation(sampler_type: SamplerType, data_source_test_hel assert sample_sql is not None, f"_build_sample_sql returned None for {sampler_type.name}" assert len(sample_sql.strip()) > 0, f"_build_sample_sql returned empty string for {sampler_type.name}" - assert str(sample_size) in sample_sql, ( - f"Sample size {sample_size} not found in generated SQL: {sample_sql}" - ) + assert str(sample_size) in sample_sql, f"Sample size {sample_size} not found in generated SQL: {sample_sql}" @pytest.mark.parametrize("sampler_type", list(SamplerType)) @@ -253,10 +247,12 @@ def test_sampling_sql_executes(sampler_type: SamplerType, data_source_test_helpe table_from_name = sql_dialect.get_from_name_from_qualified_name(test_table.qualified_name) sample_size = 50 if sampler_type == SamplerType.PERCENTAGE else 2 - select_sql = sql_dialect.build_select_sql([ - SELECT(STAR()), - FROM(table_from_name).SAMPLE(sampler_type, sample_size), - ]) + select_sql = sql_dialect.build_select_sql( + [ + SELECT(STAR()), + FROM(table_from_name).SAMPLE(sampler_type, sample_size), + ] + ) result: QueryResult = data_source_test_helper.data_source_impl.execute_query(select_sql) assert result is not None, "Sampled query returned None" @@ -318,10 +314,12 @@ def test_random_generates_valid_sql(data_source_test_helper: DataSourceTestHelpe sql_dialect: SqlDialect = data_source_impl.sql_dialect table_from_name = sql_dialect.get_from_name_from_qualified_name(test_table.qualified_name) - select_sql = sql_dialect.build_select_sql([ - SELECT(RANDOM()), - FROM(table_from_name), - ]) + select_sql = sql_dialect.build_select_sql( + [ + SELECT(RANDOM()), + FROM(table_from_name), + ] + ) result: QueryResult = data_source_impl.execute_query(select_sql) assert len(result.rows) == 3 @@ -339,8 +337,7 @@ def test_random_generates_valid_sql(data_source_test_helper: DataSourceTestHelpe def test_forward_mapping_covers_all_types(data_source_test_helper: DataSourceTestHelper): """Every SodaDataTypeName must have a data source type in the forward mapping.""" forward_map = ( - data_source_test_helper.data_source_impl.sql_dialect - .get_data_source_data_type_name_by_soda_data_type_names() + data_source_test_helper.data_source_impl.sql_dialect.get_data_source_data_type_name_by_soda_data_type_names() ) unmapped = [t.name for t in SodaDataTypeName if t not in forward_map] assert unmapped == [], f"SodaDataTypeNames missing from forward mapping: {unmapped}" @@ -384,6 +381,4 @@ def test_data_type_synonyms_internally_consistent(data_source_test_helper: DataS if len(canonicals) > 1: inconsistencies.append(f"Group {group} maps to multiple canonicals: {canonicals}") - assert inconsistencies == [], ( - "Synonym groups with inconsistent canonical mappings:\n" + "\n".join(inconsistencies) - ) + assert inconsistencies == [], "Synonym groups with inconsistent canonical mappings:\n" + "\n".join(inconsistencies) From 342fe59a923c69b5d95bede5c3a0464edf1b6956 Mon Sep 17 00:00:00 2001 From: Paul Teehan Date: Wed, 29 Apr 2026 13:31:54 +0200 Subject: [PATCH 3/4] Fix CI failures and review issues for conformance test suite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AI-written. Adapter source fixes (real bugs surfaced by the synonym bidirectionality test): - soda-redshift: move "float" out of the FLOAT synonym group and into the DOUBLE group. In Redshift, FLOAT is an alias for FLOAT8 / DOUBLE PRECISION, not for REAL / FLOAT4. The reverse mapping already had this right (float -> DOUBLE); the synonym group was inconsistent. - soda-duckdb: split the catch-all integer/decimal group and the catch-all single+double precision float group into proper per-type synonym groups. Drop type names that don't exist in DuckDB (number, byteint, timestamp_ntz, timestamp_ltz, timestamp_tz). Test fixes: - test_conformance_discovery: relax datetime_precision check from == N to >= N. Trino connectors (e.g. iceberg) normalize datetime precision to a connector-specific default (often 6) regardless of DDL, which still satisfies the contract. Drop unused test_table local in the internal-table filter test. Remove unused f-string prefixes and add comments where dialect-internal accessors are needed. - test_conformance_identifiers: case-insensitive identifier preservation check; some dialects fold case during quoting. Drop gratuitous "is not None" assertions on values typed as str. - test_conformance_types_dialect: drop two redundant private-API tests (_build_sample_sql, _build_regex_like_sql) — coverage is preserved by the existing end-to-end tests in the same file. Remove gratuitous always-true assertions (assert result is not None on a typed return, assert len(rows) >= 0). Clean up unused imports. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../common/data_sources/duckdb_data_source.py | 17 +++++--- .../data_sources/redshift_data_source.py | 4 +- .../integration/test_conformance_discovery.py | 31 +++++++++----- .../test_conformance_identifiers.py | 16 +++---- .../test_conformance_types_dialect.py | 42 ++++--------------- 5 files changed, 49 insertions(+), 61 deletions(-) diff --git a/soda-duckdb/src/soda_duckdb/common/data_sources/duckdb_data_source.py b/soda-duckdb/src/soda_duckdb/common/data_sources/duckdb_data_source.py index a70c7eaac..d15ac467f 100644 --- a/soda-duckdb/src/soda_duckdb/common/data_sources/duckdb_data_source.py +++ b/soda-duckdb/src/soda_duckdb/common/data_sources/duckdb_data_source.py @@ -128,14 +128,19 @@ def format_metadata_data_type(self, data_type: str) -> str: def _get_data_type_name_synonyms(self) -> list[list[str]]: # Implements data type synonyms - # Each list should represent a list of synonyms + # Each list should represent a list of synonyms — every member of a + # group must reverse-map to the same SodaDataTypeName (or to a Soda + # type pair that is_same_soda_data_type_with_synonyms treats as equal). return [ ["varchar", "text", "string"], - ["number", "decimal", "numeric", "int", "integer", "bigint", "smallint", "tinyint", "byteint"], - ["float", "float4", "float8", "double", "double precision", "real"], - ["timestamp", "datetime", "timestamp_ntz", "timestamp without time zone"], - ["timestamp_ltz", "timestamp with local time zone"], - ["timestamp_tz", "timestamp with time zone"], + ["decimal", "numeric"], + ["smallint", "int2"], + ["integer", "int", "int4"], + ["bigint", "int8"], + ["real", "float4", "float"], + ["float8", "double", "double precision"], + ["timestamp", "datetime", "timestamp without time zone"], + ["timestamptz", "timestamp with time zone"], ] def get_data_source_data_type_name_by_soda_data_type_names(self) -> dict: diff --git a/soda-redshift/src/soda_redshift/common/data_sources/redshift_data_source.py b/soda-redshift/src/soda_redshift/common/data_sources/redshift_data_source.py index e9236bb3b..c34ca9aec 100644 --- a/soda-redshift/src/soda_redshift/common/data_sources/redshift_data_source.py +++ b/soda-redshift/src/soda_redshift/common/data_sources/redshift_data_source.py @@ -135,8 +135,8 @@ def _get_data_type_name_synonyms(self) -> list[list[str]]: ["smallint", "int2"], ["integer", "int", "int4"], ["bigint", "int8"], - ["real", "float4", "float"], - [REDSHIFT_DOUBLE_PRECISION, "float8"], + ["real", "float4"], + [REDSHIFT_DOUBLE_PRECISION, "float8", "float"], ["timestamp", "timestamp without time zone"], ["time", "time without time zone"], ] diff --git a/soda-tests/tests/integration/test_conformance_discovery.py b/soda-tests/tests/integration/test_conformance_discovery.py index 939b76eca..aed2020f0 100644 --- a/soda-tests/tests/integration/test_conformance_discovery.py +++ b/soda-tests/tests/integration/test_conformance_discovery.py @@ -87,7 +87,8 @@ def test_discovery_excludes_soda_internal_tables(data_source_test_helper: DataSo Historical bug: commit a16b99c8 — __soda_temp tables were appearing in discovery results. """ - test_table = data_source_test_helper.ensure_test_table(simple_table) + # Ensure the schema has at least one user table so discovery has something to return. + data_source_test_helper.ensure_test_table(simple_table) metadata_query = data_source_test_helper.data_source_impl.create_metadata_tables_query() results = metadata_query.execute( @@ -250,6 +251,9 @@ def test_type_synonyms_are_bidirectional(data_source_test_helper: DataSourceTest mapping only recognizes the canonical form. """ sql_dialect: SqlDialect = data_source_test_helper.data_source_impl.sql_dialect + # Deliberately reaches into _get_data_type_name_synonyms (dialect-internal) + # because the synonym list is the unique input the test needs and is not + # exposed via any public method. synonym_lists = sql_dialect._get_data_type_name_synonyms() reverse_map = sql_dialect.get_soda_data_type_name_by_data_source_data_type_names() @@ -276,7 +280,7 @@ def test_type_synonyms_are_bidirectional(data_source_test_helper: DataSourceTest f"but others map to {canonical}" ) - assert mismatches == [], f"Type synonym bidirectionality broken:\n" + "\n".join(mismatches) + assert mismatches == [], "Type synonym bidirectionality broken:\n" + "\n".join(mismatches) # --------------------------------------------------------------------------- @@ -344,21 +348,26 @@ def test_column_type_parameters_preserved(data_source_test_helper: DataSourceTes decimal_col.sql_data_type.numeric_scale == 2 ), f"decimal_10_2: expected scale 2, got {decimal_col.sql_data_type.numeric_scale}" - # datetime_precision + # datetime_precision — assert the discovered precision is *at least* the + # requested value. Some adapters (e.g. Trino-iceberg) normalize datetime + # precision to a connector-specific default (often 6) regardless of DDL, + # which still satisfies the contract that precision is preserved or extended. if sql_dialect.supports_data_type_datetime_precision(): ts_col = cols_by_name.get("ts_precision_3") assert ts_col is not None, "Column ts_precision_3 not found" if ts_col.sql_data_type.datetime_precision is not None: - assert ( - ts_col.sql_data_type.datetime_precision == 3 - ), f"ts_precision_3: expected datetime_precision 3, got {ts_col.sql_data_type.datetime_precision}" + assert ts_col.sql_data_type.datetime_precision >= 3, ( + f"ts_precision_3: expected datetime_precision >= 3, " + f"got {ts_col.sql_data_type.datetime_precision}" + ) ts_tz_col = cols_by_name.get("ts_tz_precision_6") assert ts_tz_col is not None, "Column ts_tz_precision_6 not found" if ts_tz_col.sql_data_type.datetime_precision is not None: - assert ( - ts_tz_col.sql_data_type.datetime_precision == 6 - ), f"ts_tz_precision_6: expected datetime_precision 6, got {ts_tz_col.sql_data_type.datetime_precision}" + assert ts_tz_col.sql_data_type.datetime_precision >= 6, ( + f"ts_tz_precision_6: expected datetime_precision >= 6, " + f"got {ts_tz_col.sql_data_type.datetime_precision}" + ) # --------------------------------------------------------------------------- @@ -385,9 +394,9 @@ def test_every_forward_mapped_type_has_reverse(data_source_test_helper: DataSour for soda_type, ds_type in forward_map.items(): ds_type_lower = ds_type.lower() if isinstance(ds_type, str) else ds_type if ds_type not in reverse_map and ds_type_lower not in reverse_map: - # Check synonyms + # Check synonyms (dialect-internal mapping) canonical = sql_dialect._data_type_name_synonym_mappings.get(ds_type_lower, ds_type_lower) if canonical not in reverse_map: unmapped.append(f"{soda_type} → '{ds_type}' (no reverse)") - assert unmapped == [], f"Forward-mapped types with no reverse mapping:\n" + "\n".join(unmapped) + assert unmapped == [], "Forward-mapped types with no reverse mapping:\n" + "\n".join(unmapped) diff --git a/soda-tests/tests/integration/test_conformance_identifiers.py b/soda-tests/tests/integration/test_conformance_identifiers.py index 014a75e2f..11574b46a 100644 --- a/soda-tests/tests/integration/test_conformance_identifiers.py +++ b/soda-tests/tests/integration/test_conformance_identifiers.py @@ -323,27 +323,29 @@ def test_mixed_case_columns_schema_preserves_case(data_source_test_helper: DataS @pytest.mark.parametrize("identifier", SPECIAL_IDENTIFIERS) def test_quote_default_handles_special_identifiers(identifier: str, data_source_test_helper: DataSourceTestHelper): - """quote_default must return a quoted, non-None identifier for each special pattern.""" + """quote_default must return a quoted form (not the bare identifier) for each special pattern.""" sql_dialect = data_source_test_helper.data_source_impl.sql_dialect quoted = sql_dialect.quote_default(identifier) - assert quoted is not None, f"quote_default returned None for '{identifier}'" assert quoted != identifier, f"quote_default returned bare identifier for '{identifier}'" @pytest.mark.parametrize("identifier", SPECIAL_IDENTIFIERS) def test_quote_for_ddl_handles_special_identifiers(identifier: str, data_source_test_helper: DataSourceTestHelper): - """quote_for_ddl must return a quoted, non-None identifier for each special pattern.""" + """quote_for_ddl must return a quoted form (not the bare identifier) for each special pattern.""" sql_dialect = data_source_test_helper.data_source_impl.sql_dialect quoted = sql_dialect.quote_for_ddl(identifier) - assert quoted is not None, f"quote_for_ddl returned None for '{identifier}'" assert quoted != identifier, f"quote_for_ddl returned bare identifier for '{identifier}'" @pytest.mark.parametrize("identifier", SPECIAL_IDENTIFIERS) def test_ddl_and_dml_quoting_both_preserve_identifier(identifier: str, data_source_test_helper: DataSourceTestHelper): - """Both DDL and DML quoting must preserve the original identifier string.""" + """Both DDL and DML quoting must preserve the original identifier string. + + Comparison is case-insensitive: some dialects fold identifier case during + quoting, but the *characters* must survive the round-trip. + """ sql_dialect = data_source_test_helper.data_source_impl.sql_dialect dml_quoted = sql_dialect.quote_default(identifier) ddl_quoted = sql_dialect.quote_for_ddl(identifier) - assert identifier in dml_quoted, f"DML quoting lost identifier: {dml_quoted}" - assert identifier in ddl_quoted, f"DDL quoting lost identifier: {ddl_quoted}" + assert identifier.casefold() in dml_quoted.casefold(), f"DML quoting lost identifier: {dml_quoted}" + assert identifier.casefold() in ddl_quoted.casefold(), f"DDL quoting lost identifier: {ddl_quoted}" diff --git a/soda-tests/tests/integration/test_conformance_types_dialect.py b/soda-tests/tests/integration/test_conformance_types_dialect.py index d74f4bc35..7faa956c3 100644 --- a/soda-tests/tests/integration/test_conformance_types_dialect.py +++ b/soda-tests/tests/integration/test_conformance_types_dialect.py @@ -21,7 +21,7 @@ from soda_core.common.data_source_impl import DataSourceImpl from soda_core.common.data_source_results import QueryResult from soda_core.common.metadata_types import SamplerType, SodaDataTypeName -from soda_core.common.sql_ast import COLUMN, FROM, RANDOM, REGEX_LIKE, SELECT, STAR +from soda_core.common.sql_ast import FROM, RANDOM, SELECT, STAR from soda_core.common.sql_dialect import SqlDialect # --------------------------------------------------------------------------- @@ -218,26 +218,11 @@ def test_schema_check_all_types(data_source_test_helper: DataSourceTestHelper): # --------------------------------------------------------------------------- -@pytest.mark.parametrize("sampler_type", list(SamplerType)) -def test_sampling_sql_generation(sampler_type: SamplerType, data_source_test_helper: DataSourceTestHelper): - """For each sampler type the adapter claims to support, the generated SQL must - be non-empty and parseable (used in a SELECT ... FROM table SAMPLE clause).""" - sql_dialect: SqlDialect = data_source_test_helper.data_source_impl.sql_dialect - - if not sql_dialect.supports_sampler(sampler_type): - pytest.skip(f"{sql_dialect.__class__.__name__} does not support {sampler_type.name}") - - sample_size = 10 if sampler_type == SamplerType.PERCENTAGE else 50 - sample_sql = sql_dialect._build_sample_sql(sampler_type, sample_size) - - assert sample_sql is not None, f"_build_sample_sql returned None for {sampler_type.name}" - assert len(sample_sql.strip()) > 0, f"_build_sample_sql returned empty string for {sampler_type.name}" - assert str(sample_size) in sample_sql, f"Sample size {sample_size} not found in generated SQL: {sample_sql}" - - @pytest.mark.parametrize("sampler_type", list(SamplerType)) def test_sampling_sql_executes(sampler_type: SamplerType, data_source_test_helper: DataSourceTestHelper): - """For each supported sampler type, generate a full SELECT with sampling and execute it.""" + """For each supported sampler type, generate a full SELECT with sampling and execute it. + Exercises the public sampling SQL path end-to-end: SAMPLE clause generation, + parameter substitution, and adapter execution.""" sql_dialect: SqlDialect = data_source_test_helper.data_source_impl.sql_dialect if not sql_dialect.supports_sampler(sampler_type): @@ -254,9 +239,8 @@ def test_sampling_sql_executes(sampler_type: SamplerType, data_source_test_helpe ] ) - result: QueryResult = data_source_test_helper.data_source_impl.execute_query(select_sql) - assert result is not None, "Sampled query returned None" - assert len(result.rows) >= 0, "Sampled query returned negative row count" + # Successful execution implies non-empty parseable SQL — no extra assertion needed. + data_source_test_helper.data_source_impl.execute_query(select_sql) # --------------------------------------------------------------------------- @@ -264,18 +248,6 @@ def test_sampling_sql_executes(sampler_type: SamplerType, data_source_test_helpe # --------------------------------------------------------------------------- -def test_regex_sql_generation(data_source_test_helper: DataSourceTestHelper): - """The adapter must generate valid regex SQL from a REGEX_LIKE expression.""" - sql_dialect: SqlDialect = data_source_test_helper.data_source_impl.sql_dialect - - regex_expr = REGEX_LIKE(expression=COLUMN("col_varchar"), regex_pattern="^[a-z]+$") - sql = sql_dialect._build_regex_like_sql(regex_expr) - - assert sql is not None, "regex SQL is None" - assert len(sql.strip()) > 0, "regex SQL is empty" - assert "col_varchar" in sql, f"Column name missing from regex SQL: {sql}" - - def test_regex_via_invalid_check(data_source_test_helper: DataSourceTestHelper): """Invalid check with regex must work end-to-end (the row with NULL is excluded, the two data rows match the pattern, so no invalids among non-null rows).""" @@ -362,7 +334,7 @@ def test_reverse_mapping_covers_forward(data_source_test_helper: DataSourceTestH if not found: broken.append(f"{soda_type.name} → '{ds_type}'") - assert broken == [], f"Forward-mapped types with no reverse path:\n" + "\n".join(broken) + assert broken == [], "Forward-mapped types with no reverse path:\n" + "\n".join(broken) def test_data_type_synonyms_internally_consistent(data_source_test_helper: DataSourceTestHelper): From 9114e400b875f6aecda1ce88211bc2ee02a07739 Mon Sep 17 00:00:00 2001 From: Paul Teehan Date: Wed, 29 Apr 2026 13:34:16 +0200 Subject: [PATCH 4/4] Apply black formatting CI black hook collapsed two adjacent f-string literals onto one line. Co-Authored-By: Claude Opus 4.7 (1M context) --- soda-tests/tests/integration/test_conformance_discovery.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/soda-tests/tests/integration/test_conformance_discovery.py b/soda-tests/tests/integration/test_conformance_discovery.py index aed2020f0..8607a85c1 100644 --- a/soda-tests/tests/integration/test_conformance_discovery.py +++ b/soda-tests/tests/integration/test_conformance_discovery.py @@ -357,8 +357,7 @@ def test_column_type_parameters_preserved(data_source_test_helper: DataSourceTes assert ts_col is not None, "Column ts_precision_3 not found" if ts_col.sql_data_type.datetime_precision is not None: assert ts_col.sql_data_type.datetime_precision >= 3, ( - f"ts_precision_3: expected datetime_precision >= 3, " - f"got {ts_col.sql_data_type.datetime_precision}" + f"ts_precision_3: expected datetime_precision >= 3, " f"got {ts_col.sql_data_type.datetime_precision}" ) ts_tz_col = cols_by_name.get("ts_tz_precision_6")