From 7588dc90a8642c549ebd48d59d982ee991e2b9e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ti=E1=BA=BFn?= <57807910+xxntti3n@users.noreply.github.com> Date: Thu, 23 Apr 2026 14:52:00 +0700 Subject: [PATCH 1/4] feat(bigquery): add execution_project parameter Add execution_project to BigQueryConnectionProperties to specify a separate billing project for query execution. --- .../bigquery_data_source_connection.py | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source_connection.py b/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source_connection.py index 08db31db7..e097db09e 100644 --- a/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source_connection.py +++ b/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source_connection.py @@ -27,6 +27,7 @@ class BigQueryConnectionProperties(DataSourceConnectionProperties, ABC): project_id: Optional[str] = Field(None, description="BigQuery project ID") + execution_project: Optional[str] = Field(None, description="BigQuery execution/billing project ID. If not set, uses project_id") storage_project_id: Optional[str] = Field(None, description="BigQuery storage project ID") location: Optional[str] = Field(None, description="BigQuery location") client_options: Optional[dict] = Field(None, description="Client options") @@ -77,6 +78,127 @@ class BigQueryDataSource(DataSourceBase, ABC): ] = Field(..., alias="connection", description="BigQuery connection configuration") +class BigQueryDataSourceConnection(DataSourceConnection): + def __init__(self, name: str, connection_properties: DataSourceConnectionProperties): + super().__init__(name, connection_properties) + + def _load_project_id_and_credentials(self, config: BigQueryConnectionProperties): + if isinstance(config, BigQueryContextAuth): + logger.info("Using application default credentials.") + self.credentials, self.project_id = default() + return + + if isinstance(config, BigQueryJSONFileAuth): + account_info_dict = json.load(open(config.account_info_json_path)) + elif isinstance(config, BigQueryJSONStringAuth): + account_info_dict = json.loads(config.account_info_json.get_secret_value()) + self.credentials = Credentials.from_service_account_info( + account_info_dict, + scopes=config.auth_scopes, + ) + self.project_id = account_info_dict.get("project_id") + + def _load_optional_impersonated_credentials(self, config: BigQueryConnectionProperties): + if config.impersonation_account: + logger.info("Using impersonation of Service Account.") + if config.delegates: + logger.info("Using Service Account delegates.") + delegates = config.delegates + else: + delegates = None + self.credentials = impersonated_credentials.Credentials( + source_credentials=self.credentials, + target_principal=str(config.impersonation_account), + target_scopes=config.auth_scopes, + delegates=delegates, + ) + + def _apply_optional_params(self, config: BigQueryConnectionProperties): + # Users can optionally overwrite in the connection properties + self.project_id = config.project_id if config.project_id else self.project_id + # execution_project is the project that will be billed for queries (aka billing project) + # If not set, defaults to project_id + self.execution_project = config.execution_project if config.execution_project else self.project_id + self.location = config.location + self.client_options = config.client_options + + # Storage project ID is currently not used, because the project is configured via the DQN in the data contract. + # When we implement discovery, we'll need to use this value. + self.storage_project_id = config.storage_project_id if config.storage_project_id else self.project_id + + self.labels = config.labels + + def _create_connection( + self, + config: BigQueryConnectionProperties, + ): + self._load_project_id_and_credentials(config) + self._load_optional_impersonated_credentials(config) + self._apply_optional_params(config) + + client_info = ClientInfo( + user_agent="soda-core", + ) + default_query_job_config = bigquery.QueryJobConfig(labels=self.labels) + self.client = bigquery.Client( + project=self.execution_project, + credentials=self.credentials, + default_query_job_config=default_query_job_config, + client_info=client_info, + location=config.location, + client_options=self.client_options, + ) + + return dbapi.Connection(self.client) + + def _format_rows(self, rows: list[Row]) -> list[tuple]: + return [self._format_row(row) for row in rows] + + def _format_row(self, row: Row) -> tuple: + return tuple(row.values()) + "https://www.googleapis.com/auth/cloud-platform", + "https://www.googleapis.com/auth/drive", + ], + description="Authentication scopes", + ) + impersonation_account: Optional[str] = Field(None, description="Impersonation account") + delegates: Optional[list[str]] = Field(None, description="Delegates") + + +class BigQueryJSONStringAuth(BigQueryConnectionProperties): + """BigQuery authentication using JSON string""" + + use_context_auth: Optional[Literal[False]] = Field(False, description=CONTEXT_AUTHENTICATION_DESCRIPTION) + account_info_json: SecretStr = Field(..., description="Service account JSON as string", min_length=1) + + +class BigQueryJSONFileAuth(BigQueryConnectionProperties): + """BigQuery authentication using JSON file path""" + + use_context_auth: Optional[Literal[False]] = Field(False, description=CONTEXT_AUTHENTICATION_DESCRIPTION) + account_info_json_path: str = Field(..., description="Path to service account JSON file", min_length=1) + + +class BigQueryContextAuth(BigQueryConnectionProperties): + """BigQuery authentication using context. + + If use_context_auth is True, then application default credentials will be used. + The user may optionally provide JSON credentials; they will be ignored. + """ + + use_context_auth: Literal[True] = Field(description=CONTEXT_AUTHENTICATION_DESCRIPTION) + + +class BigQueryDataSource(DataSourceBase, ABC): + type: Literal["bigquery"] = Field("bigquery") + + connection_properties: Union[ + BigQueryJSONStringAuth, + BigQueryJSONFileAuth, + BigQueryContextAuth, + ] = Field(..., alias="connection", description="BigQuery connection configuration") + + class BigQueryDataSourceConnection(DataSourceConnection): def __init__(self, name: str, connection_properties: DataSourceConnectionProperties): super().__init__(name, connection_properties) From b73e153b09e65eb9d7b2924f6057a13995ac5fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ti=E1=BA=BFn?= <57807910+xxntti3n@users.noreply.github.com> Date: Thu, 23 Apr 2026 15:10:03 +0700 Subject: [PATCH 2/4] update --- .../bigquery_data_source_connection.py | 118 ------------------ 1 file changed, 118 deletions(-) diff --git a/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source_connection.py b/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source_connection.py index e097db09e..7f28281fa 100644 --- a/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source_connection.py +++ b/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source_connection.py @@ -154,123 +154,5 @@ def _create_connection( def _format_rows(self, rows: list[Row]) -> list[tuple]: return [self._format_row(row) for row in rows] - def _format_row(self, row: Row) -> tuple: - return tuple(row.values()) - "https://www.googleapis.com/auth/cloud-platform", - "https://www.googleapis.com/auth/drive", - ], - description="Authentication scopes", - ) - impersonation_account: Optional[str] = Field(None, description="Impersonation account") - delegates: Optional[list[str]] = Field(None, description="Delegates") - - -class BigQueryJSONStringAuth(BigQueryConnectionProperties): - """BigQuery authentication using JSON string""" - - use_context_auth: Optional[Literal[False]] = Field(False, description=CONTEXT_AUTHENTICATION_DESCRIPTION) - account_info_json: SecretStr = Field(..., description="Service account JSON as string", min_length=1) - - -class BigQueryJSONFileAuth(BigQueryConnectionProperties): - """BigQuery authentication using JSON file path""" - - use_context_auth: Optional[Literal[False]] = Field(False, description=CONTEXT_AUTHENTICATION_DESCRIPTION) - account_info_json_path: str = Field(..., description="Path to service account JSON file", min_length=1) - - -class BigQueryContextAuth(BigQueryConnectionProperties): - """BigQuery authentication using context. - - If use_context_auth is True, then application default credentials will be used. - The user may optionally provide JSON credentials; they will be ignored. - """ - - use_context_auth: Literal[True] = Field(description=CONTEXT_AUTHENTICATION_DESCRIPTION) - - -class BigQueryDataSource(DataSourceBase, ABC): - type: Literal["bigquery"] = Field("bigquery") - - connection_properties: Union[ - BigQueryJSONStringAuth, - BigQueryJSONFileAuth, - BigQueryContextAuth, - ] = Field(..., alias="connection", description="BigQuery connection configuration") - - -class BigQueryDataSourceConnection(DataSourceConnection): - def __init__(self, name: str, connection_properties: DataSourceConnectionProperties): - super().__init__(name, connection_properties) - - def _load_project_id_and_credentials(self, config: BigQueryConnectionProperties): - if isinstance(config, BigQueryContextAuth): - logger.info("Using application default credentials.") - self.credentials, self.project_id = default() - return - - if isinstance(config, BigQueryJSONFileAuth): - account_info_dict = json.load(open(config.account_info_json_path)) - elif isinstance(config, BigQueryJSONStringAuth): - account_info_dict = json.loads(config.account_info_json.get_secret_value()) - self.credentials = Credentials.from_service_account_info( - account_info_dict, - scopes=config.auth_scopes, - ) - self.project_id = account_info_dict.get("project_id") - - def _load_optional_impersonated_credentials(self, config: BigQueryConnectionProperties): - if config.impersonation_account: - logger.info("Using impersonation of Service Account.") - if config.delegates: - logger.info("Using Service Account delegates.") - delegates = config.delegates - else: - delegates = None - self.credentials = impersonated_credentials.Credentials( - source_credentials=self.credentials, - target_principal=str(config.impersonation_account), - target_scopes=config.auth_scopes, - delegates=delegates, - ) - - def _apply_optional_params(self, config: BigQueryConnectionProperties): - # Users can optionally overwrite in the connection properties - self.project_id = config.project_id if config.project_id else self.project_id - self.location = config.location - self.client_options = config.client_options - - # Storage project ID is currently not used, because the project is configured via the DQN in the data contract. - # When we implement discovery, we'll need to use this value. - self.storage_project_id = config.storage_project_id if config.storage_project_id else self.project_id - - self.labels = config.labels - - def _create_connection( - self, - config: BigQueryConnectionProperties, - ): - self._load_project_id_and_credentials(config) - self._load_optional_impersonated_credentials(config) - self._apply_optional_params(config) - - client_info = ClientInfo( - user_agent="soda-core", - ) - default_query_job_config = bigquery.QueryJobConfig(labels=self.labels) - self.client = bigquery.Client( - project=self.project_id, - credentials=self.credentials, - default_query_job_config=default_query_job_config, - client_info=client_info, - location=config.location, - client_options=self.client_options, - ) - - return dbapi.Connection(self.client) - - def _format_rows(self, rows: list[Row]) -> list[tuple]: - return [self._format_row(row) for row in rows] - def _format_row(self, row: Row) -> tuple: return tuple(row.values()) From 6effb60dc4a8125823359ea2dbee391e5c16b78d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ti=E1=BA=BFn?= <57807910+xxntti3n@users.noreply.github.com> Date: Fri, 24 Apr 2026 10:30:49 +0700 Subject: [PATCH 3/4] add test case for bigquery connection source --- soda-tests/tests/integration/test_bigquery | 468 +++++++++++++++++++++ 1 file changed, 468 insertions(+) create mode 100644 soda-tests/tests/integration/test_bigquery diff --git a/soda-tests/tests/integration/test_bigquery b/soda-tests/tests/integration/test_bigquery new file mode 100644 index 000000000..a2d44f997 --- /dev/null +++ b/soda-tests/tests/integration/test_bigquery @@ -0,0 +1,468 @@ +import os +import tempfile + +import pytest +from helpers.test_connection import TestConnection + +# define environment variables used in test cases +BIGQUERY_ACCOUNT_INFO_JSON = os.getenv("BIGQUERY_ACCOUNT_INFO_JSON", "") +BIGQUERY_LOCATION = os.getenv("BIGQUERY_LOCATION", "US") +with tempfile.NamedTemporaryFile(delete=False) as temp_file: + temp_file.write(BIGQUERY_ACCOUNT_INFO_JSON.encode()) + BIGQUERY_ACCOUNT_INFO_JSON_PATH = temp_file.name + + +# define test cases and expected behavior (passing unless otherwise specified) +test_connections: list[TestConnection] = [ + TestConnection( # correct connection, should work + test_name="correct_json", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + location: '{BIGQUERY_LOCATION}' + """, + ), + TestConnection( # missing required field, should fail + test_name="yaml_missing_required", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_jsosn: '{BIGQUERY_ACCOUNT_INFO_JSON}' + location: '{BIGQUERY_LOCATION}' + """, + valid_yaml=False, + expected_yaml_error="""BigQueryJSONStringAuth.account_info_json\n Field required [type=missing, input_value={\'account_info_jsosn\'""", + ), + TestConnection( # passing bad credentials, should parse, but fail to connect + test_name="incorrect_json", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: 'BAD_CREDENTIALS' + location: '{BIGQUERY_LOCATION}' + """, + valid_connection_params=False, + expected_connection_error="Could not connect to 'BIGQUERY_TEST_DS'", + ), + TestConnection( # passing bad path, should parse, but fail to connect + test_name="json_path_issing", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json_path: 'missing.json' + location: '{BIGQUERY_LOCATION}' + """, + valid_connection_params=False, + expected_connection_error="No such file or directory", + ), + TestConnection( # passing good path, should work + test_name="correct_json_path", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json_path: '{BIGQUERY_ACCOUNT_INFO_JSON_PATH}' + location: '{BIGQUERY_LOCATION}' + """, + ), + TestConnection( # use application default credentials, should fail + # note if you have credentials in your env, you should remove them if you want this test to pass + # otherwise you'll get a different error and the test will fail + test_name="app_default_creds", + connection_yaml_str=""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + use_context_auth: true + """, + valid_connection_params=False, + expected_connection_error="Your default credentials were not found", + ), + TestConnection( # impersonation account, should fail at query stage + test_name="impersonation_account", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + impersonation_account: 'test@test.com' + """, + query_should_succeed=False, + expected_query_error="Unable to acquire impersonated credentials", + ), + TestConnection( # prove that delegates are propagating correctly + test_name="impersonation_account_with_invalid_delegates", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + impersonation_account: 'test@test.com' + delegates: ['invalid_delegate'] + """, + query_should_succeed=False, + expected_query_error="Invalid form of account ID invalid_delegate", + ), + TestConnection( # prove that extra fields are accepted and passed through without error + test_name="extra_field_accepted", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + extra_field: 'extra_value' + """, + ), + TestConnection( # prove that custom project_id is being set and used + test_name="custom_project_id", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + project_id: 'bigquery-public-data' + """, + query_should_succeed=False, + expected_query_error="User does not have bigquery.jobs.create permission in project bigquery-public-data", + ), + TestConnection( # prove that location is being set at all + test_name="new_invalid_location", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + location: 'XX' + """, + query_should_succeed=False, + expected_query_error="Location XX does not support this operation", + ), + TestConnection( # prove that a valid location is propagating correctly + test_name="new_valid_location", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + location: 'asia-east1' + """, + ), + TestConnection( # prove that auth scopes are propagating correctly + test_name="invalid_auth_scopes", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + auth_scopes: ['invalid'] + """, + query_should_succeed=False, + expected_query_error="No access token in response", + ), + TestConnection( # prove that labels are propagating correctly + test_name="invalid_labels", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + labels: {{'-invalid-': "invalid"}} + """, + query_should_succeed=False, + expected_query_error='Label key "-invalid-" has invalid characters.', + ), + TestConnection( # prove that client options are propagating correctly + test_name="invalid_client_options", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + client_options: {{'-invalid-': "invalid"}} + """, + valid_connection_params=False, + expected_connection_error="ClientOptions does not accept an option '-invalid-'", + ), + TestConnection( # prove that execution_project defaults to project_id when not set + test_name="execution_project_defaults_to_project_id", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + project_id: 'bigquery-public-data' + """, + query_should_succeed=False, + expected_query_error="User does not have bigquery.jobs.create permission in project bigquery-public-data", + ), + TestConnection( # prove that execution_project is being used for billing + test_name="custom_execution_project", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + execution_project: 'bigquery-public-data' + """, + query_should_succeed=False, + expected_query_error="User does not have bigquery.jobs.create permission in project bigquery-public-data", + ), + TestConnection( # prove that execution_project takes precedence over project_id for billing + test_name="execution_project_overrides_project_id", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + project_id: 'bigquery-public-data' + execution_project: 'bigquery-public-data' + """, + query_should_succeed=False, + expected_query_error="User does not have bigquery.jobs.create permission in project bigquery-public-data", + ), +] + + +# run tests. parameterization means each test case will show up as an individual test +@pytest.mark.parametrize("test_connection", test_connections, ids=[tc.test_name for tc in test_connections]) +def test_bigquery_connections(test_connection: TestConnection): + test_connection.test() +import os +import tempfile + +import pytest +from helpers.test_connection import TestConnection + +# define environment variables used in test cases +BIGQUERY_ACCOUNT_INFO_JSON = os.getenv("BIGQUERY_ACCOUNT_INFO_JSON", "") +BIGQUERY_LOCATION = os.getenv("BIGQUERY_LOCATION", "US") +with tempfile.NamedTemporaryFile(delete=False) as temp_file: + temp_file.write(BIGQUERY_ACCOUNT_INFO_JSON.encode()) + BIGQUERY_ACCOUNT_INFO_JSON_PATH = temp_file.name + + +# define test cases and expected behavior (passing unless otherwise specified) +test_connections: list[TestConnection] = [ + TestConnection( # correct connection, should work + test_name="correct_json", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + location: '{BIGQUERY_LOCATION}' + """, + ), + TestConnection( # missing required field, should fail + test_name="yaml_missing_required", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_jsosn: '{BIGQUERY_ACCOUNT_INFO_JSON}' + location: '{BIGQUERY_LOCATION}' + """, + valid_yaml=False, + expected_yaml_error="""BigQueryJSONStringAuth.account_info_json\n Field required [type=missing, input_value={\'account_info_jsosn\'""", + ), + TestConnection( # passing bad credentials, should parse, but fail to connect + test_name="incorrect_json", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: 'BAD_CREDENTIALS' + location: '{BIGQUERY_LOCATION}' + """, + valid_connection_params=False, + expected_connection_error="Could not connect to 'BIGQUERY_TEST_DS'", + ), + TestConnection( # passing bad path, should parse, but fail to connect + test_name="json_path_issing", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json_path: 'missing.json' + location: '{BIGQUERY_LOCATION}' + """, + valid_connection_params=False, + expected_connection_error="No such file or directory", + ), + TestConnection( # passing good path, should work + test_name="correct_json_path", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json_path: '{BIGQUERY_ACCOUNT_INFO_JSON_PATH}' + location: '{BIGQUERY_LOCATION}' + """, + ), + TestConnection( # use application default credentials, should fail + # note if you have credentials in your env, you should remove them if you want this test to pass + # otherwise you'll get a different error and the test will fail + test_name="app_default_creds", + connection_yaml_str=""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + use_context_auth: true + """, + valid_connection_params=False, + expected_connection_error="Your default credentials were not found", + ), + TestConnection( # impersonation account, should fail at query stage + test_name="impersonation_account", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + impersonation_account: 'test@test.com' + """, + query_should_succeed=False, + expected_query_error="Unable to acquire impersonated credentials", + ), + TestConnection( # prove that delegates are propagating correctly + test_name="impersonation_account_with_invalid_delegates", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + impersonation_account: 'test@test.com' + delegates: ['invalid_delegate'] + """, + query_should_succeed=False, + expected_query_error="Invalid form of account ID invalid_delegate", + ), + TestConnection( # prove that extra fields are accepted and passed through without error + test_name="extra_field_accepted", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + extra_field: 'extra_value' + """, + ), + TestConnection( # prove that custom project_id is being set and used + test_name="custom_project_id", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + project_id: 'bigquery-public-data' + """, + query_should_succeed=False, + expected_query_error="User does not have bigquery.jobs.create permission in project bigquery-public-data", + ), + TestConnection( # prove that location is being set at all + test_name="new_invalid_location", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + location: 'XX' + """, + query_should_succeed=False, + expected_query_error="Location XX does not support this operation", + ), + TestConnection( # prove that a valid location is propagating correctly + test_name="new_valid_location", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + location: 'asia-east1' + """, + ), + TestConnection( # prove that auth scopes are propagating correctly + test_name="invalid_auth_scopes", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + auth_scopes: ['invalid'] + """, + query_should_succeed=False, + expected_query_error="No access token in response", + ), + TestConnection( # prove that labels are propagating correctly + test_name="invalid_labels", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + labels: {{'-invalid-': "invalid"}} + """, + query_should_succeed=False, + expected_query_error='Label key "-invalid-" has invalid characters.', + ), + TestConnection( # prove that client options are propagating correctly + test_name="invalid_client_options", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + client_options: {{'-invalid-': "invalid"}} + """, + valid_connection_params=False, + expected_connection_error="ClientOptions does not accept an option '-invalid-'", + ), + TestConnection( # prove that execution_project defaults to project_id when not set + test_name="execution_project_defaults_to_project_id", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + project_id: 'bigquery-public-data' + """, + query_should_succeed=False, + expected_query_error="User does not have bigquery.jobs.create permission in project bigquery-public-data", + ), + TestConnection( # prove that execution_project is being used for billing + test_name="custom_execution_project", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + execution_project: 'bigquery-public-data' + """, + query_should_succeed=False, + expected_query_error="User does not have bigquery.jobs.create permission in project bigquery-public-data", + ), + TestConnection( # prove that execution_project takes precedence over project_id for billing + test_name="execution_project_overrides_project_id", + connection_yaml_str=f""" + type: bigquery + name: BIGQUERY_TEST_DS + connection: + account_info_json: '{BIGQUERY_ACCOUNT_INFO_JSON}' + project_id: 'bigquery-public-data' + execution_project: 'bigquery-public-data' + """, + query_should_succeed=False, + expected_query_error="User does not have bigquery.jobs.create permission in project bigquery-public-data", + ), +] + + +# run tests. parameterization means each test case will show up as an individual test +@pytest.mark.parametrize("test_connection", test_connections, ids=[tc.test_name for tc in test_connections]) +def test_bigquery_connections(test_connection: TestConnection): + test_connection.test() From 5ad805c58ff760543b6a1f16569aaaf47ad5288a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ti=E1=BA=BFn?= <57807910+xxntti3n@users.noreply.github.com> Date: Fri, 24 Apr 2026 10:36:45 +0700 Subject: [PATCH 4/4] feat: Add validation for None project_id --- .../bigquery_data_source_connection.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source_connection.py b/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source_connection.py index 7f28281fa..94a8b8933 100644 --- a/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source_connection.py +++ b/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source_connection.py @@ -27,7 +27,9 @@ class BigQueryConnectionProperties(DataSourceConnectionProperties, ABC): project_id: Optional[str] = Field(None, description="BigQuery project ID") - execution_project: Optional[str] = Field(None, description="BigQuery execution/billing project ID. If not set, uses project_id") + execution_project: Optional[str] = Field( + None, description="BigQuery execution/billing project ID. If not set, uses project_id" + ) storage_project_id: Optional[str] = Field(None, description="BigQuery storage project ID") location: Optional[str] = Field(None, description="BigQuery location") client_options: Optional[dict] = Field(None, description="Client options") @@ -118,7 +120,16 @@ def _apply_optional_params(self, config: BigQueryConnectionProperties): self.project_id = config.project_id if config.project_id else self.project_id # execution_project is the project that will be billed for queries (aka billing project) # If not set, defaults to project_id - self.execution_project = config.execution_project if config.execution_project else self.project_id + if config.execution_project: + self.execution_project = config.execution_project + else: + # Ensure project_id is set before using it as fallback + if not self.project_id: + raise ValueError( + "Either execution_project or project_id must be set. " + "When using context authentication, ensure your default credentials include a project ID." + ) + self.execution_project = self.project_id self.location = config.location self.client_options = config.client_options