Skip to content

Commit 42b7bbb

Browse files
feat(ingest): enable use_queries_v2 by default for snowflake/bigquery (#13601)
Co-authored-by: Sergio Gómez Villamor <[email protected]>
1 parent 7313f16 commit 42b7bbb

File tree

9 files changed

+13
-8
lines changed

9 files changed

+13
-8
lines changed

docs/how/updating-datahub.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
4242
- `DataHubGraph.parse_sql_lineage(default_dialect=...)``DataHubGraph.parse_sql_lineage(override_dialect=...)`
4343
- `LineageClient.add_lineage_via_sql(default_dialect=...)``LineageClient.add_lineage_via_sql(override_dialect=...)`
4444
- #14059: The `acryl-datahub-gx-plugin` now requires pydantic v2, which means the effective minimum supported version of GX is 0.17.15 (from Sept 2023).
45+
- #13601: The `use_queries_v2` flag is now enabled by default for Snowflake and BigQuery ingestion. This improves the quality of lineage and quantity of queries extracted.
4546

4647
### Known Issues
4748

metadata-ingestion/docs/sources/snowflake/snowflake_recipe.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
source:
22
type: snowflake
33
config:
4-
# This option is recommended to be used to ingest all lineage
4+
# This option is recommended to be used to ingest all lineage on the first run.
55
ignore_start_time_lineage: true
66

7-
# This flag tells the snowflake ingestion to use the more advanced query parsing. This will become the default eventually.
8-
use_queries_v2: true
9-
107
# Coordinates
118
account_id: "abc48144"
129
warehouse: "COMPUTE_WH"

metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ class BigQueryV2Config(
342342
)
343343

344344
use_queries_v2: bool = Field(
345-
default=False,
345+
default=True,
346346
description="If enabled, uses the new queries extractor to extract queries from bigquery.",
347347
)
348348
include_queries: bool = Field(

metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ class SnowflakeV2Config(
236236
)
237237

238238
use_queries_v2: bool = Field(
239-
default=False,
239+
default=True,
240240
description="If enabled, uses the new queries extractor to extract queries from snowflake.",
241241
)
242242
include_queries: bool = Field(

metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ def test_bigquery_queries_v2_ingest(
457457
# if use_queries_v2 is set.
458458
pipeline_config_dict: Dict[str, Any] = recipe(
459459
mcp_output_path=mcp_output_path,
460-
source_config_override={"use_queries_v2": True, "include_table_lineage": False},
460+
source_config_override={"include_table_lineage": False},
461461
)
462462

463463
run_and_get_pipeline(pipeline_config_dict)
@@ -564,7 +564,6 @@ def test_bigquery_queries_v2_lineage_usage_ingest(
564564
pipeline_config_dict: Dict[str, Any] = recipe(
565565
mcp_output_path=mcp_output_path,
566566
source_config_override={
567-
"use_queries_v2": True,
568567
"include_schema_metadata": False,
569568
"include_table_lineage": True,
570569
"include_usage_statistics": True,

metadata-ingestion/tests/integration/snowflake/test_snowflake.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
125125
validate_upstreams_against_patterns=False,
126126
include_operational_stats=True,
127127
incremental_lineage=False,
128+
use_queries_v2=False,
128129
start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
129130
tzinfo=timezone.utc
130131
),
@@ -220,6 +221,7 @@ def test_snowflake_tags_as_structured_properties(
220221
password="TST_PWD",
221222
match_fully_qualified_names=True,
222223
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
224+
use_queries_v2=False,
223225
include_technical_schema=True,
224226
include_table_lineage=False,
225227
include_column_lineage=False,
@@ -286,6 +288,7 @@ def test_snowflake_private_link_and_incremental_mcps(
286288
include_views=True,
287289
include_usage_stats=False,
288290
format_sql_queries=True,
291+
use_queries_v2=False,
289292
incremental_lineage=False,
290293
incremental_properties=True,
291294
include_operational_stats=False,

metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def snowflake_pipeline_config(tmp_path):
5959
match_fully_qualified_names=True,
6060
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
6161
include_usage_stats=False,
62+
use_queries_v2=False,
6263
start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
6364
tzinfo=timezone.utc,
6465
),

metadata-ingestion/tests/integration/snowflake/test_snowflake_stateful.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def stateful_pipeline_config(include_tables: bool) -> PipelineConfig:
3232
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
3333
include_tables=include_tables,
3434
incremental_lineage=False,
35+
use_queries_v2=False,
3536
stateful_ingestion=StatefulStaleMetadataRemovalConfig.parse_obj(
3637
{
3738
"enabled": True,

metadata-ingestion/tests/integration/snowflake/test_snowflake_tag.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def test_snowflake_tag_pattern():
3333
include_column_lineage=False,
3434
include_usage_stats=False,
3535
include_operational_stats=False,
36+
use_queries_v2=False,
3637
extract_tags=TagOption.without_lineage,
3738
)
3839

@@ -76,6 +77,7 @@ def test_snowflake_tag_pattern_deny():
7677
include_column_lineage=False,
7778
include_usage_stats=False,
7879
include_operational_stats=False,
80+
use_queries_v2=False,
7981
extract_tags=TagOption.without_lineage,
8082
)
8183

@@ -116,6 +118,7 @@ def test_snowflake_structured_property_pattern_deny():
116118
schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
117119
extract_tags_as_structured_properties=True,
118120
structured_properties_template_cache_invalidation_interval=0,
121+
use_queries_v2=False,
119122
tag_pattern=AllowDenyPattern(
120123
deny=["TEST_DB.TEST_SCHEMA.my_tag_2:my_value_2"]
121124
),

0 commit comments

Comments
 (0)