Skip to content

Commit 208cb97

Browse files
authored
feat: maxcompute column level masking policy (#80)
feat: implemented column level masking policy instead of table level
1 parent 034c01a commit 208cb97

File tree

7 files changed

+65
-53
lines changed

7 files changed

+65
-53
lines changed

plugins/extractors/maxcompute/client/client.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ import (
1111
"google.golang.org/protobuf/types/known/structpb"
1212
)
1313

14+
type (
15+
Column = string
16+
Policy = string
17+
)
18+
1419
type Client struct {
1520
client *odps.Odps
1621
project *odps.Project
@@ -121,22 +126,18 @@ func (c *Client) GetTablePreview(_ context.Context, partitionValue string, table
121126
return columnNames, protoList, nil
122127
}
123128

124-
func (*Client) GetMaskingPolicies(table *odps.Table) (maskingPolicies []string, err error) {
129+
func (*Client) GetMaskingPolicies(table *odps.Table) (maskingPolicies map[Column][]Policy, err error) {
125130
columnMaskInfos, err := table.ColumnMaskInfos()
126131
if err != nil {
127132
return nil, err
128133
}
129134

130-
policySet := make(map[string]struct{})
135+
maskingPolicies = make(map[string][]string)
131136
for _, columnMaskInfo := range columnMaskInfos {
132-
for _, policyName := range columnMaskInfo.PolicyNameList {
133-
policySet[policyName] = struct{}{}
137+
if len(columnMaskInfo.PolicyNameList) > 0 {
138+
maskingPolicies[columnMaskInfo.Name] = columnMaskInfo.PolicyNameList
134139
}
135140
}
136141

137-
for policyName := range policySet {
138-
maskingPolicies = append(maskingPolicies, policyName)
139-
}
140-
141142
return maskingPolicies, nil
142143
}

plugins/extractors/maxcompute/maxcompute.go

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,16 @@ import (
2727
)
2828

2929
const (
30-
maxcomputeService = "maxcompute"
30+
maxcomputeService = "maxcompute"
31+
attributesDataSQL = "sql"
32+
attributesDataMaskingPolicy = "masking_policy"
33+
attributesDataProjectName = "project_name"
34+
attributesDataSchema = "schema"
35+
attributesDataType = "type"
36+
attributesDataResourceURL = "resource_url"
37+
attributesDataPartitionFields = "partition_fields"
38+
attributesDataLabel = "label"
39+
attributesDataResourceType = "resource_type"
3140
)
3241

3342
type Extractor struct {
@@ -80,7 +89,7 @@ type Client interface {
8089
ListTable(ctx context.Context, schemaName string) ([]*odps.Table, error)
8190
GetTableSchema(ctx context.Context, table *odps.Table) (string, *tableschema.TableSchema, error)
8291
GetTablePreview(ctx context.Context, partitionValue string, table *odps.Table, maxRows int) ([]string, *structpb.ListValue, error)
83-
GetMaskingPolicies(table *odps.Table) ([]string, error)
92+
GetMaskingPolicies(table *odps.Table) (map[client.Column][]client.Policy, error)
8493
}
8594

8695
func New(logger log.Logger, clientFunc NewClientFunc, randFn randFn) *Extractor {
@@ -217,7 +226,7 @@ func (e *Extractor) buildAsset(ctx context.Context, schema *odps.Schema,
217226

218227
if tableType == config.TableTypeView {
219228
query := tableSchema.ViewText
220-
tableAttributesData["sql"] = query
229+
tableAttributesData[attributesDataSQL] = query
221230
if e.config.BuildViewLineage {
222231
upstreamResources := getUpstreamResources(query)
223232
asset.Lineage = &v1beta2.Lineage{
@@ -226,6 +235,11 @@ func (e *Extractor) buildAsset(ctx context.Context, schema *odps.Schema,
226235
}
227236
}
228237

238+
maskingPolicy, err := e.client.GetMaskingPolicies(table)
239+
if err != nil {
240+
e.logger.Warn("error getting masking policy", "error", err)
241+
}
242+
229243
var columns []*v1beta2.Column
230244
for i, col := range tableSchema.Columns {
231245
columnData := &v1beta2.Column{
@@ -236,6 +250,19 @@ func (e *Extractor) buildAsset(ctx context.Context, schema *odps.Schema,
236250
Attributes: utils.TryParseMapToProto(buildColumnAttributesData(&tableSchema.Columns[i])),
237251
Columns: buildColumns(col.Type),
238252
}
253+
254+
if policies, found := maskingPolicy[col.Name]; found {
255+
policyValues := make([]*structpb.Value, 0, len(policies))
256+
for _, policy := range policies {
257+
policyValues = append(policyValues, structpb.NewStringValue(policy))
258+
}
259+
columnData.Attributes.Fields[attributesDataMaskingPolicy] = &structpb.Value{
260+
Kind: &structpb.Value_ListValue{
261+
ListValue: &structpb.ListValue{Values: policyValues},
262+
},
263+
}
264+
}
265+
239266
columns = append(columns, columnData)
240267
}
241268

@@ -302,15 +329,15 @@ func buildColumns(dataType datatype.DataType) []*v1beta2.Column {
302329
func (e *Extractor) buildTableAttributesData(schemaName, tableType string, table *odps.Table, tableInfo *tableschema.TableSchema) map[string]interface{} {
303330
attributesData := map[string]interface{}{}
304331

305-
attributesData["project_name"] = e.config.ProjectName
306-
attributesData["schema"] = schemaName
307-
attributesData["type"] = tableType
332+
attributesData[attributesDataProjectName] = e.config.ProjectName
333+
attributesData[attributesDataSchema] = schemaName
334+
attributesData[attributesDataType] = tableType
308335

309336
rb := common.ResourceBuilder{ProjectName: e.config.ProjectName}
310-
attributesData["resource_url"] = rb.Table(schemaName, tableInfo.TableName)
337+
attributesData[attributesDataResourceURL] = rb.Table(schemaName, tableInfo.TableName)
311338

312339
if tableInfo.ViewText != "" {
313-
attributesData["sql"] = tableInfo.ViewText
340+
attributesData[attributesDataSQL] = tableInfo.ViewText
314341
}
315342

316343
var partitionNames []interface{}
@@ -319,18 +346,8 @@ func (e *Extractor) buildTableAttributesData(schemaName, tableType string, table
319346
for i, column := range tableInfo.PartitionColumns {
320347
partitionNames[i] = column.Name
321348
}
322-
attributesData["partition_fields"] = partitionNames
323-
}
324-
325-
maskingPolicy, err := e.client.GetMaskingPolicies(table)
326-
if err != nil {
327-
e.logger.Warn("error getting masking policy", "error", err)
328-
}
329-
maskingPolicyInterface := make([]interface{}, len(maskingPolicy))
330-
for i, policy := range maskingPolicy {
331-
maskingPolicyInterface[i] = policy
349+
attributesData[attributesDataPartitionFields] = partitionNames
332350
}
333-
attributesData["masking_policy"] = maskingPolicyInterface
334351

335352
return attributesData
336353
}
@@ -343,7 +360,7 @@ func buildColumnAttributesData(column *tableschema.Column) map[string]interface{
343360
}
344361

345362
if column.Label != "" {
346-
attributesData["label"] = column.Label
363+
attributesData[attributesDataLabel] = column.Label
347364
}
348365

349366
return attributesData

plugins/extractors/maxcompute/maxcompute_test.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,10 @@ func TestExtract(t *testing.T) {
216216
},
217217
nil,
218218
)
219-
mockClient.EXPECT().GetMaskingPolicies(mock.Anything).Return([]string{"policyTag1", "policyTag2", "policyTag3"}, nil)
219+
mockClient.EXPECT().GetMaskingPolicies(mock.Anything).Return(map[string][]string{
220+
"user_id": {"policyTag1"},
221+
"email": {"policyTag2", "policyTag3"},
222+
}, nil)
220223
}, nil)
221224

222225
assert.Nil(t, err)
@@ -267,7 +270,7 @@ func TestExtract(t *testing.T) {
267270
},
268271
nil,
269272
)
270-
mockClient.EXPECT().GetMaskingPolicies(mock.Anything).Return([]string{}, nil)
273+
mockClient.EXPECT().GetMaskingPolicies(mock.Anything).Return(map[string][]string{}, nil)
271274
}, nil)
272275

273276
assert.Nil(t, err)
@@ -293,7 +296,7 @@ func TestExtract(t *testing.T) {
293296
mockClient.EXPECT().ListSchema(mock.Anything).Return(schema1, nil)
294297
mockClient.EXPECT().ListTable(mock.Anything, "my_schema").Return(table1[1:], nil)
295298
mockClient.EXPECT().GetTableSchema(mock.Anything, table1[1]).Return("MANAGED_TABLE", schemaMapping[table1[1].Name()], nil)
296-
mockClient.EXPECT().GetMaskingPolicies(mock.Anything).Return([]string{}, nil)
299+
mockClient.EXPECT().GetMaskingPolicies(mock.Anything).Return(map[string][]string{}, nil)
297300
}, nil)
298301

299302
assert.Nil(t, err)

plugins/extractors/maxcompute/mocks/maxcompute_client_mock.go

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

plugins/extractors/maxcompute/testdata/expected-assets-with-table-exclusion.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
"preview_fields": [],
3535
"preview_rows": null,
3636
"attributes": {
37-
"masking_policy": [],
3837
"project_name": "test-project-id",
3938
"resource_url": "/projects/test-project-id/schemas/my_schema/tables/new_table",
4039
"schema": "my_schema",

plugins/extractors/maxcompute/testdata/expected-assets-with-view-lineage.json

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
"preview_fields": [],
5656
"preview_rows": null,
5757
"attributes": {
58-
"masking_policy": [],
5958
"project_name": "test-project-id",
6059
"resource_url": "/projects/test-project-id/schemas/my_schema/tables/dummy_table",
6160
"schema": "my_schema",
@@ -129,7 +128,6 @@
129128
]
130129
],
131130
"attributes": {
132-
"masking_policy": [],
133131
"project_name": "test-project-id",
134132
"resource_url": "/projects/test-project-id/schemas/my_schema/tables/new_table",
135133
"schema": "my_schema",

plugins/extractors/maxcompute/testdata/expected-assets.json

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,6 @@
5555
"preview_fields": [],
5656
"preview_rows": null,
5757
"attributes": {
58-
"masking_policy": [
59-
"policyTag1",
60-
"policyTag2",
61-
"policyTag3"
62-
],
6358
"project_name": "test-project-id",
6459
"resource_url": "/projects/test-project-id/schemas/my_schema/tables/dummy_table",
6560
"schema": "my_schema",
@@ -95,7 +90,9 @@
9590
"length": "0",
9691
"profile": null,
9792
"columns": [],
98-
"attributes": {}
93+
"attributes": {
94+
"masking_policy": ["policyTag1"]
95+
}
9996
},
10097
{
10198
"name": "email",
@@ -105,7 +102,9 @@
105102
"length": "0",
106103
"profile": null,
107104
"columns": [],
108-
"attributes": {}
105+
"attributes": {
106+
"masking_policy": ["policyTag2", "policyTag3"]
107+
}
109108
}
110109
],
111110
"preview_fields": [
@@ -123,11 +122,6 @@
123122
]
124123
],
125124
"attributes": {
126-
"masking_policy": [
127-
"policyTag1",
128-
"policyTag2",
129-
"policyTag3"
130-
],
131125
"project_name": "test-project-id",
132126
"resource_url": "/projects/test-project-id/schemas/my_schema/tables/new_table",
133127
"schema": "my_schema",

0 commit comments

Comments
 (0)