Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
column_1,column_2,column_3
a,b,c
a , b , c
a ,b, c
a, b, c
5 changes: 5 additions & 0 deletions integration_tests/models/sql/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,11 @@ models:
- assert_equal:
actual: actual_all_columns_list
expected: expected_all_columns

- name: test_generate_surrogate_key_trim
description: "Test that trim parameter correctly removes leading/trailing whitespace before hashing"
# Note: Aggregate tests are in tests/sql/test_generate_surrogate_key_trim_distinct_count.sql
# because expression_is_true cannot use aggregate functions in WHERE clause

- name: test_union
data_tests:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
with data as (

select * from {{ ref('data_generate_surrogate_key_trim') }}

)

select
-- Test without trim (default behavior) - should produce different hashes for whitespace variations
{{ dbt_utils.generate_surrogate_key(['column_1', 'column_2', 'column_3']) }} as key_no_trim,

-- Test with trim enabled - should produce identical hashes regardless of whitespace
{{ dbt_utils.generate_surrogate_key(['column_1', 'column_2', 'column_3'], trim=true) }} as key_with_trim,

column_1,
column_2,
column_3

from data
2 changes: 1 addition & 1 deletion integration_tests/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ integration_tests:
database: "{{ env_var('SNOWFLAKE_DATABASE') }}"
warehouse: "{{ env_var('SNOWFLAKE_WAREHOUSE') }}"
schema: "{{ env_var('SNOWFLAKE_SCHEMA') }}"
threads: 10
threads: 10
7 changes: 7 additions & 0 deletions integration_tests/setup_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
export POSTGRES_HOST=localhost
export POSTGRES_USER=dummy
export POSTGRES_PASS=dummy
export POSTGRES_PORT=5432
export POSTGRES_DATABASE=dummy
echo "Environment variables set for dbt testing"
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- Test that the trim parameter correctly affects the number of distinct surrogate keys
-- key_with_trim should have 1 distinct value (all whitespace trimmed = identical inputs)
-- key_no_trim should have 4 distinct values (whitespace preserved = different inputs)

with counts as (
select
count(distinct key_with_trim) as distinct_count_with_trim,
count(distinct key_no_trim) as distinct_count_no_trim
from {{ ref('test_generate_surrogate_key_trim') }}
)

select *
from counts
where distinct_count_with_trim != 1
or distinct_count_no_trim != 4
20 changes: 13 additions & 7 deletions macros/sql/generate_surrogate_key.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{%- macro generate_surrogate_key(field_list) -%}
{{ return(adapter.dispatch('generate_surrogate_key', 'dbt_utils')(field_list)) }}
{%- macro generate_surrogate_key(field_list, trim=false) -%}
{{ return(adapter.dispatch('generate_surrogate_key', 'dbt_utils')(field_list, trim)) }}
{% endmacro %}

{%- macro default__generate_surrogate_key(field_list) -%}
{%- macro default__generate_surrogate_key(field_list, trim=false) -%}

{%- if var('surrogate_key_treat_nulls_as_empty_strings', False) -%}
{%- set default_null_value = "" -%}
Expand All @@ -14,9 +14,15 @@

{%- for field in field_list -%}

{%- do fields.append(
"coalesce(cast(" ~ field ~ " as " ~ dbt.type_string() ~ "), '" ~ default_null_value ~"')"
) -%}
{%- if trim -%}
{%- do fields.append(
"coalesce(trim(cast(" ~ field ~ " as " ~ dbt.type_string() ~ ")), '" ~ default_null_value ~"')"
) -%}
{%- else -%}
{%- do fields.append(
"coalesce(cast(" ~ field ~ " as " ~ dbt.type_string() ~ "), '" ~ default_null_value ~"')"
) -%}
{%- endif -%}

{%- if not loop.last %}
{%- do fields.append("'-'") -%}
Expand All @@ -26,4 +32,4 @@

{{ dbt.hash(dbt.concat(fields)) }}

{%- endmacro -%}
{%- endmacro -%}
Loading