Skip to content

Commit e00dab2

Browse files
betodealmeidaclaude
andcommitted
feat(semantic-layer): add core semantic layer infrastructure
Add the foundational semantic layer implementation: - SemanticLayer and SemanticView SQLAlchemy models - Semantic layer registry for plugin-based implementations - Query mapper for translating Superset queries to semantic layer format - Type definitions for metrics, dimensions, entities, and grains - DAO layer for semantic layer CRUD operations - Database migration for semantic_layers and semantic_views tables - Updated Explorable base class with ColumnMetadata protocol - TypedDict updates for API response compatibility - Update sql_lab and sqla models for new TypedDict fields Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 91d018c commit e00dab2

File tree

13 files changed

+2445
-36
lines changed

13 files changed

+2445
-36
lines changed

superset/connectors/sqla/models.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@
107107
from superset.superset_typing import (
108108
AdhocColumn,
109109
AdhocMetric,
110+
DatasetColumnData,
111+
DatasetMetricData,
110112
ExplorableData,
111113
Metric,
112114
QueryObjectDict,
@@ -463,8 +465,8 @@ def data(self) -> ExplorableData:
463465
# sqla-specific
464466
"sql": self.sql,
465467
# one to many
466-
"columns": [o.data for o in self.columns],
467-
"metrics": [o.data for o in self.metrics],
468+
"columns": cast(list[DatasetColumnData], [o.data for o in self.columns]),
469+
"metrics": cast(list[DatasetMetricData], [o.data for o in self.metrics]),
468470
"folders": self.folders,
469471
# TODO deprecate, move logic to JS
470472
"order_by_choices": self.order_by_choices,

superset/daos/semantic_layer.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
"""DAOs for semantic layer models."""
19+
20+
from __future__ import annotations
21+
22+
from superset.daos.base import BaseDAO
23+
from superset.extensions import db
24+
from superset.semantic_layers.models import SemanticLayer, SemanticView
25+
26+
27+
class SemanticLayerDAO(BaseDAO[SemanticLayer]):
28+
"""
29+
Data Access Object for SemanticLayer model.
30+
"""
31+
32+
@staticmethod
33+
def validate_uniqueness(name: str) -> bool:
34+
"""
35+
Validate that semantic layer name is unique.
36+
37+
:param name: Semantic layer name
38+
:return: True if name is unique, False otherwise
39+
"""
40+
query = db.session.query(SemanticLayer).filter(SemanticLayer.name == name)
41+
return not db.session.query(query.exists()).scalar()
42+
43+
@staticmethod
44+
def validate_update_uniqueness(layer_uuid: str, name: str) -> bool:
45+
"""
46+
Validate that semantic layer name is unique for updates.
47+
48+
:param layer_uuid: UUID of the semantic layer being updated
49+
:param name: New name to validate
50+
:return: True if name is unique, False otherwise
51+
"""
52+
query = db.session.query(SemanticLayer).filter(
53+
SemanticLayer.name == name,
54+
SemanticLayer.uuid != layer_uuid,
55+
)
56+
return not db.session.query(query.exists()).scalar()
57+
58+
@staticmethod
59+
def find_by_name(name: str) -> SemanticLayer | None:
60+
"""
61+
Find semantic layer by name.
62+
63+
:param name: Semantic layer name
64+
:return: SemanticLayer instance or None
65+
"""
66+
return (
67+
db.session.query(SemanticLayer)
68+
.filter(SemanticLayer.name == name)
69+
.one_or_none()
70+
)
71+
72+
@classmethod
73+
def get_semantic_views(cls, layer_uuid: str) -> list[SemanticView]:
74+
"""
75+
Get all semantic views for a semantic layer.
76+
77+
:param layer_uuid: UUID of the semantic layer
78+
:return: List of SemanticView instances
79+
"""
80+
return (
81+
db.session.query(SemanticView)
82+
.filter(SemanticView.semantic_layer_uuid == layer_uuid)
83+
.all()
84+
)
85+
86+
87+
class SemanticViewDAO(BaseDAO[SemanticView]):
88+
"""Data Access Object for SemanticView model."""
89+
90+
@staticmethod
91+
def find_by_semantic_layer(layer_uuid: str) -> list[SemanticView]:
92+
"""
93+
Find all views for a semantic layer.
94+
95+
:param layer_uuid: UUID of the semantic layer
96+
:return: List of SemanticView instances
97+
"""
98+
return (
99+
db.session.query(SemanticView)
100+
.filter(SemanticView.semantic_layer_uuid == layer_uuid)
101+
.all()
102+
)
103+
104+
@staticmethod
105+
def validate_uniqueness(name: str, layer_uuid: str) -> bool:
106+
"""
107+
Validate that view name is unique within semantic layer.
108+
109+
:param name: View name
110+
:param layer_uuid: UUID of the semantic layer
111+
:return: True if name is unique within layer, False otherwise
112+
"""
113+
query = db.session.query(SemanticView).filter(
114+
SemanticView.name == name,
115+
SemanticView.semantic_layer_uuid == layer_uuid,
116+
)
117+
return not db.session.query(query.exists()).scalar()
118+
119+
@staticmethod
120+
def validate_update_uniqueness(view_uuid: str, name: str, layer_uuid: str) -> bool:
121+
"""
122+
Validate that view name is unique within semantic layer for updates.
123+
124+
:param view_uuid: UUID of the view being updated
125+
:param name: New name to validate
126+
:param layer_uuid: UUID of the semantic layer
127+
:return: True if name is unique within layer, False otherwise
128+
"""
129+
query = db.session.query(SemanticView).filter(
130+
SemanticView.name == name,
131+
SemanticView.semantic_layer_uuid == layer_uuid,
132+
SemanticView.uuid != view_uuid,
133+
)
134+
return not db.session.query(query.exists()).scalar()
135+
136+
@staticmethod
137+
def find_by_name(name: str, layer_uuid: str) -> SemanticView | None:
138+
"""
139+
Find semantic view by name within a semantic layer.
140+
141+
:param name: View name
142+
:param layer_uuid: UUID of the semantic layer
143+
:return: SemanticView instance or None
144+
"""
145+
return (
146+
db.session.query(SemanticView)
147+
.filter(
148+
SemanticView.name == name,
149+
SemanticView.semantic_layer_uuid == layer_uuid,
150+
)
151+
.one_or_none()
152+
)

superset/explorables/base.py

Lines changed: 126 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,130 @@ class TimeGrainDict(TypedDict):
5353
duration: str | None
5454

5555

56+
@runtime_checkable
57+
class MetricMetadata(Protocol):
58+
"""
59+
Protocol for metric metadata objects.
60+
61+
Represents a metric that's available on an explorable data source.
62+
Metrics contain SQL expressions or references to semantic layer measures.
63+
64+
Attributes:
65+
metric_name: Unique identifier for the metric
66+
expression: SQL expression or reference for calculating the metric
67+
verbose_name: Human-readable name for display in the UI
68+
description: Description of what the metric represents
69+
d3format: D3 format string for formatting numeric values
70+
currency: Currency configuration for the metric (JSON object)
71+
warning_text: Warning message to display when using this metric
72+
certified_by: Person or entity that certified this metric
73+
certification_details: Details about the certification
74+
"""
75+
76+
@property
77+
def metric_name(self) -> str:
78+
"""Unique identifier for the metric."""
79+
80+
@property
81+
def expression(self) -> str:
82+
"""SQL expression or reference for calculating the metric."""
83+
84+
@property
85+
def verbose_name(self) -> str | None:
86+
"""Human-readable name for display in the UI."""
87+
88+
@property
89+
def description(self) -> str | None:
90+
"""Description of what the metric represents."""
91+
92+
@property
93+
def d3format(self) -> str | None:
94+
"""D3 format string for formatting numeric values."""
95+
96+
@property
97+
def currency(self) -> dict[str, Any] | None:
98+
"""Currency configuration for the metric (JSON object)."""
99+
100+
@property
101+
def warning_text(self) -> str | None:
102+
"""Warning message to display when using this metric."""
103+
104+
@property
105+
def certified_by(self) -> str | None:
106+
"""Person or entity that certified this metric."""
107+
108+
@property
109+
def certification_details(self) -> str | None:
110+
"""Details about the certification."""
111+
112+
113+
@runtime_checkable
114+
class ColumnMetadata(Protocol):
115+
"""
116+
Protocol for column metadata objects.
117+
118+
Represents a column/dimension that's available on an explorable data source.
119+
Used for grouping, filtering, and dimension-based analysis.
120+
121+
Attributes:
122+
column_name: Unique identifier for the column
123+
type: SQL data type of the column (e.g., 'VARCHAR', 'INTEGER', 'DATETIME')
124+
is_dttm: Whether this column represents a date or time value
125+
verbose_name: Human-readable name for display in the UI
126+
description: Description of what the column represents
127+
groupby: Whether this column is allowed for grouping/aggregation
128+
filterable: Whether this column can be used in filters
129+
expression: SQL expression if this is a calculated column
130+
python_date_format: Python datetime format string for temporal columns
131+
advanced_data_type: Advanced data type classification
132+
extra: Additional metadata stored as JSON
133+
"""
134+
135+
@property
136+
def column_name(self) -> str:
137+
"""Unique identifier for the column."""
138+
139+
@property
140+
def type(self) -> str:
141+
"""SQL data type of the column."""
142+
143+
@property
144+
def is_dttm(self) -> bool:
145+
"""Whether this column represents a date or time value."""
146+
147+
@property
148+
def verbose_name(self) -> str | None:
149+
"""Human-readable name for display in the UI."""
150+
151+
@property
152+
def description(self) -> str | None:
153+
"""Description of what the column represents."""
154+
155+
@property
156+
def groupby(self) -> bool:
157+
"""Whether this column is allowed for grouping/aggregation."""
158+
159+
@property
160+
def filterable(self) -> bool:
161+
"""Whether this column can be used in filters."""
162+
163+
@property
164+
def expression(self) -> str | None:
165+
"""SQL expression if this is a calculated column."""
166+
167+
@property
168+
def python_date_format(self) -> str | None:
169+
"""Python datetime format string for temporal columns."""
170+
171+
@property
172+
def advanced_data_type(self) -> str | None:
173+
"""Advanced data type classification."""
174+
175+
@property
176+
def extra(self) -> str | None:
177+
"""Additional metadata stored as JSON."""
178+
179+
56180
@runtime_checkable
57181
class Explorable(Protocol):
58182
"""
@@ -132,7 +256,7 @@ def type(self) -> str:
132256
"""
133257

134258
@property
135-
def metrics(self) -> list[Any]:
259+
def metrics(self) -> list[MetricMetadata]:
136260
"""
137261
List of metric metadata objects.
138262
@@ -147,7 +271,7 @@ def metrics(self) -> list[Any]:
147271

148272
# TODO: rename to dimensions
149273
@property
150-
def columns(self) -> list[Any]:
274+
def columns(self) -> list[ColumnMetadata]:
151275
"""
152276
List of column metadata objects.
153277

0 commit comments

Comments
 (0)