-
Notifications
You must be signed in to change notification settings - Fork 24
feature/union-data #71
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
0750e91
5e7b061
8731319
7996d58
edb9fb5
6e48fb4
0847422
b80a7d6
5ee26fb
66af67a
d374b1c
49031b5
b866815
5393797
fd38316
1315d4a
5efbf7a
210b65c
a66527a
453f703
b3f000c
7712c9c
5aab18a
05653d2
6139bcd
a5f4825
b2ee9bf
7ec576b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -71,3 +71,5 @@ env/ | |
| env.bak/ | ||
| venv/ | ||
| venv.bak/ | ||
|
|
||
| CLAUDE.md | ||
Large diffs are not rendered by default.
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| {% macro apply_source_relation() -%} | ||
|
|
||
| {{ adapter.dispatch('apply_source_relation', 'github') () }} | ||
|
|
||
| {%- endmacro %} | ||
|
|
||
| {% macro default__apply_source_relation() -%} | ||
|
|
||
| {% if var('github_sources', []) != [] %} | ||
| , _dbt_source_relation as source_relation | ||
| {% else %} | ||
| , '{{ var("github_database", target.database) }}' || '.'|| '{{ var("github_schema", "github") }}' as source_relation | ||
| {% endif %} | ||
|
|
||
| {%- endmacro %} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,76 @@ | ||
| {% macro github_union_connections(connection_dictionary, single_source_name, single_table_name, default_identifier) %} | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we remove the I don't think it's necessary for the other packages we plan to roll union_data out to as well (I think)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From standup, we decided to use |
||
|
|
||
| {{ adapter.dispatch('github_union_connections', 'github') (connection_dictionary, single_source_name, single_table_name, default_identifier) }} | ||
|
|
||
| {%- endmacro %} | ||
|
|
||
| {% macro default__github_union_connections(connection_dictionary, single_source_name, single_table_name, default_identifier) %} | ||
|
|
||
| {% if connection_dictionary %} | ||
| {# For unioning #} | ||
| {%- set relations = [] -%} | ||
| {%- for connection in connection_dictionary -%} | ||
|
fivetran-catfritz marked this conversation as resolved.
Outdated
|
||
|
|
||
| {%- set relation=adapter.get_relation( | ||
| database=source(connection.name, single_table_name).database, | ||
| schema=source(connection.name, single_table_name).schema, | ||
| identifier=source(connection.name, single_table_name).identifier) | ||
| if var('has_defined_sources', false) | ||
|
|
||
| else adapter.get_relation( | ||
| database=connection.database if connection.database else target.database, | ||
| schema=connection.schema if connection.schema else single_source_name, | ||
| identifier=default_identifier | ||
| ) | ||
| -%} | ||
|
|
||
| {%- if relation is not none -%} | ||
| {%- do relations.append(relation) -%} | ||
| {%- endif -%} | ||
|
|
||
| {%- endfor -%} | ||
|
|
||
| {%- if relations != [] -%} | ||
| {{ github.github_union_relations(relations) }} | ||
| {%- else -%} | ||
| {% if execute and not var('fivetran__remove_empty_table_warnings', false) -%} | ||
| {{ exceptions.warn("\n\nPlease be aware: The " ~ single_source_name ~ "." ~ single_table_name ~ " table was not found in your schema(s). The Fivetran Data Model will create a completely empty staging model as to not break downstream transformations. To turn off these warnings, set the `fivetran__remove_empty_table_warnings` variable to TRUE (see https://github.com/fivetran/dbt_fivetran_utils/tree/releases/v0.4.latest#union_data-source for details).\n") }} | ||
| {% endif -%} | ||
| select | ||
| cast(null as {{ dbt.type_string() }}) as _dbt_source_relation | ||
| limit {{ '0' if target.type != 'redshift' else '1' }} | ||
| {%- endif -%} | ||
|
|
||
| {% else %} | ||
| {# Not unioning #} | ||
|
|
||
| {% set identifier_var = single_source_name + "_" + single_table_name + "_identifier"%} | ||
|
|
||
| {%- set relation=adapter.get_relation( | ||
| database=source(single_source_name, single_table_name).database, | ||
| schema=source(single_source_name, single_table_name).schema, | ||
| identifier=source(single_source_name, single_table_name).identifier | ||
| ) -%} | ||
| -- ** Values passed to adapter.get_relation: | ||
| {{ '-- full-identifier_var: ' ~ identifier_var }} | ||
| {{ '-- database: ' ~ source(single_source_name, single_table_name).database }} | ||
| {{ '-- schema: ' ~ source(single_source_name, single_table_name).schema }} | ||
| {{ '-- identifier: ' ~ source(single_source_name, single_table_name).identifier }} | ||
|
|
||
| {% if relation is not none -%} | ||
| select | ||
| {{ dbt_utils.star(from=source(single_source_name, single_table_name)) }} | ||
| from {{ source(single_source_name, single_table_name) }} as source_table | ||
|
|
||
| {% else %} | ||
| {% if execute and not var('fivetran__remove_empty_table_warnings', false) -%} | ||
| {{ exceptions.warn("\n\nPlease be aware: The " ~ single_source_name|upper ~ "." ~ single_table_name|upper ~ " table was not found in your schema(s). The Fivetran Data Model will create a completely empty staging model as to not break downstream transformations. To turn off these warnings, set the `fivetran__remove_empty_table_warnings` variable to TRUE (see https://github.com/fivetran/dbt_fivetran_utils/tree/releases/v0.4.latest#union_data-source for details).\n") }} | ||
| {% endif -%} | ||
|
|
||
| select | ||
| cast(null as {{ dbt.type_string() }}) as _dbt_source_relation | ||
| limit {{ '0' if target.type != 'redshift' else '1' }} | ||
| {%- endif -%} | ||
| {% endif -%} | ||
|
|
||
| {%- endmacro %} | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,131 @@ | ||
| {# Adapted from dbt_utils.union_relations() #} | ||
|
|
||
| {%- macro github_union_relations(relations, aliases=none, column_override=none, include=[], exclude=[], source_column_name='_dbt_source_relation', where=none) -%} | ||
| {{ return(adapter.dispatch('github_union_relations', 'github')(relations, aliases, column_override, include, exclude, source_column_name, where)) }} | ||
| {% endmacro %} | ||
|
|
||
| {%- macro default__github_union_relations(relations, aliases=none, column_override=none, include=[], exclude=[], source_column_name='_dbt_source_relation', where=none) -%} | ||
|
|
||
| {%- if exclude and include -%} | ||
| {{ exceptions.raise_compiler_error("Both an exclude and include list were provided to the `union` macro. Only one is allowed") }} | ||
| {%- endif -%} | ||
|
|
||
| {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. -#} | ||
| {%- if not execute %} | ||
| {{ return('') }} | ||
| {% endif -%} | ||
|
|
||
| {%- set column_override = column_override if column_override is not none else {} -%} | ||
|
|
||
| {%- set relation_columns = {} -%} | ||
| {%- set column_superset = {} -%} | ||
| {%- set all_excludes = [] -%} | ||
| {%- set all_includes = [] -%} | ||
|
|
||
| {%- if exclude -%} | ||
| {%- for exc in exclude -%} | ||
| {%- do all_excludes.append(exc | lower) -%} | ||
| {%- endfor -%} | ||
| {%- endif -%} | ||
|
|
||
| {%- if include -%} | ||
| {%- for inc in include -%} | ||
| {%- do all_includes.append(inc | lower) -%} | ||
| {%- endfor -%} | ||
| {%- endif -%} | ||
|
|
||
| {%- for relation in relations -%} | ||
|
|
||
| {%- do relation_columns.update({relation: []}) -%} | ||
|
|
||
| {%- do dbt_utils._is_relation(relation, 'github_union_relations') -%} | ||
| {%- do dbt_utils._is_ephemeral(relation, 'github_union_relations') -%} | ||
| {%- set cols = adapter.get_columns_in_relation(relation) -%} | ||
| {%- for col in cols -%} | ||
|
|
||
| {#- If an exclude list was provided and the column is in the list, do nothing -#} | ||
| {%- if exclude and col.column | lower in all_excludes -%} | ||
|
|
||
| {#- If an include list was provided and the column is not in the list, do nothing -#} | ||
| {%- elif include and col.column | lower not in all_includes -%} | ||
|
|
||
| {#- Otherwise add the column to the column superset -#} | ||
| {%- else -%} | ||
|
|
||
| {#- update the list of columns in this relation -#} | ||
| {%- do relation_columns[relation].append(col.column) -%} | ||
|
|
||
| {%- if col.column in column_superset -%} | ||
|
|
||
| {%- set stored = column_superset[col.column] -%} | ||
| {%- if col.is_string() and stored.is_string() and col.string_size() > stored.string_size() -%} | ||
|
|
||
| {%- do column_superset.update({col.column: col}) -%} | ||
|
|
||
| {%- endif %} | ||
|
|
||
| {%- else -%} | ||
|
|
||
| {%- do column_superset.update({col.column: col}) -%} | ||
|
|
||
| {%- endif -%} | ||
|
|
||
| {%- endif -%} | ||
|
|
||
| {%- endfor -%} | ||
| {%- endfor -%} | ||
|
|
||
| {%- set ordered_column_names = column_superset.keys() -%} | ||
| {%- set dbt_command = flags.WHICH -%} | ||
|
|
||
|
|
||
| {% if dbt_command in ['run', 'build'] %} | ||
| {% if (include | length > 0 or exclude | length > 0) and not column_superset.keys() %} | ||
| {%- set relations_string -%} | ||
| {%- for relation in relations -%} | ||
| {{ relation.name }} | ||
| {%- if not loop.last %}, {% endif -%} | ||
| {%- endfor -%} | ||
| {%- endset -%} | ||
|
|
||
| {%- set error_message -%} | ||
| There were no columns found to union for relations {{ relations_string }} | ||
| {%- endset -%} | ||
|
|
||
| {{ exceptions.raise_compiler_error(error_message) }} | ||
| {%- endif -%} | ||
| {%- endif -%} | ||
|
|
||
| {%- for relation in relations %} | ||
|
|
||
| ( | ||
| select | ||
|
|
||
| {%- if source_column_name is not none %} | ||
| cast({{ dbt.string_literal(relation.database ~ '.' ~ relation.schema) }} as {{ dbt.type_string() }}) as {{ source_column_name }}, | ||
| {%- endif %} | ||
|
|
||
| {% for col_name in ordered_column_names -%} | ||
|
|
||
| {%- set col = column_superset[col_name] %} | ||
| {%- set col_type = column_override.get(col.column, col.data_type) %} | ||
| {%- set col_name = adapter.quote(col_name) if col_name in relation_columns[relation] else 'null' %} | ||
| cast({{ col_name }} as {{ col_type }}) as {{ col.quoted }} {% if not loop.last %},{% endif -%} | ||
|
|
||
| {%- endfor %} | ||
|
|
||
| {# This alias is the only addition made to the dbt_utils.union_relations() code. Avoids errors if the table is named a reserved keyword #} | ||
| from {{ aliases[loop.index0] if aliases else relation }} as unioned_relation_{{ loop.index }} | ||
|
|
||
| {% if where -%} | ||
| where {{ where }} | ||
| {%- endif %} | ||
| ) | ||
|
|
||
| {% if not loop.last -%} | ||
| union all | ||
| {% endif -%} | ||
|
|
||
| {%- endfor -%} | ||
|
|
||
| {%- endmacro -%} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| {% docs source_relation %} Identifies the record's source. {% enddocs %} |
Uh oh!
There was an error while loading. Please reload this page.