Skip to content

Commit b69e656

Browse files
authored
feat(artifact): add admin APIs for KB consolidation migration (#331)
**Because** - The KB consolidation migration (convert000070) needs to move files from legacy/duplicate KBs to the primary default KB - Files can belong to multiple KBs (many-to-many relationship via junction table) - After consolidating files, legacy KBs need to be hard-deleted (bypassing soft delete) with CASCADE removal of file-KB associations - The previous `easimon/maximize-build-space` action was deprecated **This commit** - Adds `AddFilesToKnowledgeBaseAdmin` handler that accepts file resource names (`namespaces/{namespace}/files/{file}`), resolves file IDs to UIDs internally, and inserts into junction table (skipping duplicates via `ON CONFLICT DO NOTHING`) - Adds `DeleteKnowledgeBaseAdmin` handler that drops Milvus collection, purges ACL entry, and hard-deletes KB record (CASCADE removes file_knowledge_base associations) - Adds `HardDeleteKnowledgeBase` repository method using `Unscoped().Delete()` to bypass soft delete - Adds `AddFilesToKnowledgeBase` repository method with raw SQL to lookup file UIDs by IDs and insert associations - Updates integration-test workflow to use `jlumbroso/free-disk-space` action - Regenerates repository mocks with new methods
1 parent 77fdc0d commit b69e656

File tree

7 files changed

+920
-12
lines changed

7 files changed

+920
-12
lines changed

.github/workflows/integration-test.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,16 @@ jobs:
1313
name: Integration test
1414
runs-on: ubuntu-latest
1515
steps:
16-
- name: Maximize build space
17-
uses: easimon/maximize-build-space@master
16+
- name: Free disk space
17+
uses: jlumbroso/free-disk-space@main
1818
with:
19-
root-reserve-mb: 5120
20-
remove-dotnet: "true"
21-
remove-android: "true"
22-
remove-haskell: "true"
23-
remove-codeql: "true"
24-
remove-docker-images: "true"
25-
build-mount-path: "/var/lib/docker"
19+
tool-cache: false
20+
android: true
21+
dotnet: true
22+
haskell: true
23+
large-packages: true
24+
docker-images: true
25+
swap-storage: false
2626

2727
- name: Restart docker
2828
run: sudo service docker restart

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ require (
1313
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0
1414
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1
1515
github.com/iancoleman/strcase v0.3.0
16-
github.com/instill-ai/protogen-go v0.3.3-alpha.0.20260201062717-96c489b7a2ba
16+
github.com/instill-ai/protogen-go v0.3.3-alpha.0.20260201213435-984ba4ebc92e
1717
github.com/instill-ai/x v0.10.1-alpha.0.20260129195415-09e1680f104d
1818
github.com/knadh/koanf v1.5.0
1919
github.com/mennanov/fieldmask-utils v1.1.2

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -419,8 +419,8 @@ github.com/iancoleman/strcase v0.3.0 h1:nTXanmYxhfFAMjZL34Ov6gkzEsSJZ5DbhxWjvSAS
419419
github.com/iancoleman/strcase v0.3.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho=
420420
github.com/imkira/go-interpol v1.1.0/go.mod h1:z0h2/2T3XF8kyEPpRgJ3kmNv+C43p+I/CoI+jC3w2iA=
421421
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
422-
github.com/instill-ai/protogen-go v0.3.3-alpha.0.20260201062717-96c489b7a2ba h1:H6YsrxOBckOZD8gp7yzXt4DrQi5b3XZBcmrt7wj4DMI=
423-
github.com/instill-ai/protogen-go v0.3.3-alpha.0.20260201062717-96c489b7a2ba/go.mod h1:bCnBosofpaUxKBuTTJM3/I3thAK37kvfBnKByjnLsl4=
422+
github.com/instill-ai/protogen-go v0.3.3-alpha.0.20260201213435-984ba4ebc92e h1:xXUG15PsU2sU1BFJmAvuY25HNCA7Xa0gs4CjG1oPSzQ=
423+
github.com/instill-ai/protogen-go v0.3.3-alpha.0.20260201213435-984ba4ebc92e/go.mod h1:bCnBosofpaUxKBuTTJM3/I3thAK37kvfBnKByjnLsl4=
424424
github.com/instill-ai/x v0.10.1-alpha.0.20260129195415-09e1680f104d h1:FLDqMpALmq2e/j9inTGWrY4mhRFhe5/xXvjjxwQDqDg=
425425
github.com/instill-ai/x v0.10.1-alpha.0.20260129195415-09e1680f104d/go.mod h1:/xG0LMh43kyyK9jXGtV/JGd5p9GVcreSZ6GRGVbALEU=
426426
github.com/iris-contrib/blackfriday v2.0.0+incompatible/go.mod h1:UzZ2bDEoaSGPbkg6SAB4att1aAwTmVIx/5gCVqeyUdI=

pkg/handler/private.go

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,3 +1049,119 @@ func (h *PrivateHandler) ResetKnowledgeBaseEmbeddingsAdmin(ctx context.Context,
10491049
FilesToReembed: int32(fileCount),
10501050
}, nil
10511051
}
1052+
1053+
// AddFilesToKnowledgeBaseAdmin adds file associations to a target KB by file resource names (admin only).
1054+
// Files can belong to multiple KBs (many-to-many relationship).
1055+
func (h *PrivateHandler) AddFilesToKnowledgeBaseAdmin(ctx context.Context, req *artifactpb.AddFilesToKnowledgeBaseAdminRequest) (*artifactpb.AddFilesToKnowledgeBaseAdminResponse, error) {
1056+
logger, _ := logx.GetZapLogger(ctx)
1057+
1058+
// Parse target KB from name (format: namespaces/{namespace}/knowledge-bases/{kb})
1059+
targetNamespaceID, targetKBID, err := parseKnowledgeBaseFromName(req.GetTargetKnowledgeBase())
1060+
if err != nil {
1061+
return nil, status.Errorf(codes.InvalidArgument, "invalid target_knowledge_base format: %v", err)
1062+
}
1063+
1064+
logger.Info("AddFilesToKnowledgeBaseAdmin called",
1065+
zap.String("target_namespace_id", targetNamespaceID),
1066+
zap.String("target_kb_id", targetKBID),
1067+
zap.Int("file_count", len(req.GetFiles())))
1068+
1069+
// Get target namespace
1070+
targetNs, err := h.service.GetNamespaceByNsID(ctx, targetNamespaceID)
1071+
if err != nil {
1072+
logger.Error("failed to get target namespace", zap.Error(err))
1073+
return nil, status.Errorf(codes.NotFound, "target namespace not found: %v", err)
1074+
}
1075+
1076+
// Get target knowledge base
1077+
targetKB, err := h.service.Repository().GetKnowledgeBaseByOwnerAndKbID(ctx, targetNs.NsUID, targetKBID)
1078+
if err != nil {
1079+
logger.Error("failed to get target knowledge base", zap.Error(err))
1080+
return nil, status.Errorf(codes.NotFound, "target knowledge base not found: %v", err)
1081+
}
1082+
1083+
// Parse file resource names to extract file IDs
1084+
// Format: namespaces/{namespace}/files/{file}
1085+
fileIDs := make([]string, 0, len(req.GetFiles()))
1086+
for _, fileName := range req.GetFiles() {
1087+
_, _, fileID, err := parseFileFromName(fileName)
1088+
if err != nil {
1089+
return nil, status.Errorf(codes.InvalidArgument, "invalid file resource name format: %s", fileName)
1090+
}
1091+
fileIDs = append(fileIDs, fileID)
1092+
}
1093+
1094+
// Add file associations
1095+
filesAdded, err := h.service.Repository().AddFilesToKnowledgeBase(ctx, types.KnowledgeBaseUIDType(targetKB.UID), fileIDs)
1096+
if err != nil {
1097+
logger.Error("failed to add files to knowledge base", zap.Error(err))
1098+
return nil, status.Errorf(codes.Internal, "failed to add files: %v", err)
1099+
}
1100+
1101+
logger.Info("AddFilesToKnowledgeBaseAdmin completed",
1102+
zap.String("target_kb_uid", targetKB.UID.String()),
1103+
zap.Int64("files_added", filesAdded))
1104+
1105+
return &artifactpb.AddFilesToKnowledgeBaseAdminResponse{
1106+
FilesAdded: int32(filesAdded),
1107+
}, nil
1108+
}
1109+
1110+
// DeleteKnowledgeBaseAdmin force-deletes a knowledge base (admin only).
1111+
// CASCADE removes file-KB associations from junction table. Files remain orphaned.
1112+
// Also cleans up the Milvus collection.
1113+
func (h *PrivateHandler) DeleteKnowledgeBaseAdmin(ctx context.Context, req *artifactpb.DeleteKnowledgeBaseAdminRequest) (*artifactpb.DeleteKnowledgeBaseAdminResponse, error) {
1114+
logger, _ := logx.GetZapLogger(ctx)
1115+
1116+
// Parse namespace and KB ID from name (format: namespaces/{namespace}/knowledge-bases/{kb})
1117+
namespaceID, knowledgeBaseID, err := parseKnowledgeBaseFromName(req.GetName())
1118+
if err != nil {
1119+
return nil, status.Errorf(codes.InvalidArgument, "invalid name format: %v", err)
1120+
}
1121+
1122+
logger.Info("DeleteKnowledgeBaseAdmin called",
1123+
zap.String("namespace_id", namespaceID),
1124+
zap.String("knowledge_base_id", knowledgeBaseID))
1125+
1126+
// Get namespace
1127+
ns, err := h.service.GetNamespaceByNsID(ctx, namespaceID)
1128+
if err != nil {
1129+
logger.Error("failed to get namespace", zap.Error(err))
1130+
return nil, status.Errorf(codes.NotFound, "namespace not found: %v", err)
1131+
}
1132+
1133+
// Get knowledge base
1134+
kb, err := h.service.Repository().GetKnowledgeBaseByOwnerAndKbID(ctx, ns.NsUID, knowledgeBaseID)
1135+
if err != nil {
1136+
logger.Error("failed to get knowledge base", zap.Error(err))
1137+
return nil, status.Errorf(codes.NotFound, "knowledge base not found: %v", err)
1138+
}
1139+
1140+
collectionName := constant.KBCollectionName(kb.ActiveCollectionUID)
1141+
1142+
// Step 1: Drop Milvus collection if it exists
1143+
if err := h.service.Repository().DropCollection(ctx, collectionName); err != nil {
1144+
// Log but continue - collection might not exist
1145+
logger.Warn("Failed to drop Milvus collection (may not exist)", zap.Error(err), zap.String("collection", collectionName))
1146+
} else {
1147+
logger.Info("Dropped Milvus collection", zap.String("collection", collectionName))
1148+
}
1149+
1150+
// Step 2: Remove ACL entry
1151+
if err := h.service.ACLClient().Purge(ctx, "knowledgebase", kb.UID); err != nil {
1152+
// Log but continue - ACL entry might not exist
1153+
logger.Warn("Failed to purge ACL entry", zap.Error(err))
1154+
}
1155+
1156+
// Step 3: Hard delete the knowledge base (CASCADE will remove file_knowledge_base associations)
1157+
if err := h.service.Repository().HardDeleteKnowledgeBase(ctx, kb.UID.String()); err != nil {
1158+
logger.Error("failed to delete knowledge base", zap.Error(err))
1159+
return nil, status.Errorf(codes.Internal, "failed to delete knowledge base: %v", err)
1160+
}
1161+
1162+
logger.Info("DeleteKnowledgeBaseAdmin completed",
1163+
zap.String("kb_uid", kb.UID.String()),
1164+
zap.String("kb_id", kb.ID))
1165+
1166+
return &artifactpb.DeleteKnowledgeBaseAdminResponse{}, nil
1167+
}

pkg/repository/file.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,11 @@ type File interface {
9696
// Returns the number of files affected.
9797
ResetFileStatusesByKBUID(ctx context.Context, kbUID types.KBUIDType) (int64, error)
9898

99+
// AddFilesToKnowledgeBase adds file associations to a target KB by file IDs.
100+
// Files can belong to multiple KBs (many-to-many relationship).
101+
// Returns the number of new associations added (skips duplicates).
102+
AddFilesToKnowledgeBase(ctx context.Context, targetKBUID types.KnowledgeBaseUIDType, fileIDs []string) (int64, error)
103+
99104
// Deprecated methods
100105

101106
// GetFileByKBUIDAndFileID returns the file by
@@ -643,6 +648,33 @@ func (r *repository) ResetFileStatusesByKBUID(ctx context.Context, kbUID types.K
643648
return result.RowsAffected, nil
644649
}
645650

651+
// AddFilesToKnowledgeBase adds file associations to a target KB by file IDs.
652+
// Files can belong to multiple KBs (many-to-many relationship).
653+
// Returns the number of new associations added (skips duplicates).
654+
func (r *repository) AddFilesToKnowledgeBase(ctx context.Context, targetKBUID types.KnowledgeBaseUIDType, fileIDs []string) (int64, error) {
655+
if len(fileIDs) == 0 {
656+
return 0, nil
657+
}
658+
659+
// Use raw SQL to:
660+
// 1. Look up file UIDs from the file table by their IDs
661+
// 2. Insert into junction table, skipping duplicates
662+
sql := `
663+
INSERT INTO file_knowledge_base (file_uid, kb_uid, created_at)
664+
SELECT f.uid, ?, NOW()
665+
FROM file f
666+
WHERE f.id = ANY(?)
667+
ON CONFLICT (file_uid, kb_uid) DO NOTHING
668+
`
669+
670+
result := r.db.WithContext(ctx).Exec(sql, targetKBUID, fileIDs)
671+
if result.Error != nil {
672+
return 0, fmt.Errorf("failed to add files to knowledge base: %w", result.Error)
673+
}
674+
675+
return result.RowsAffected, nil
676+
}
677+
646678
// ProcessFiles updates the process status of the files
647679
func (r *repository) ProcessFiles(
648680
ctx context.Context,

pkg/repository/knowledge_base.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ type KnowledgeBase interface {
100100
// DeleteKnowledgeBaseTx soft-deletes a KB within a transaction
101101
DeleteKnowledgeBaseTx(ctx context.Context, tx *gorm.DB, owner, kbID string) error
102102

103+
// HardDeleteKnowledgeBase permanently deletes a KB and CASCADE removes file_knowledge_base associations
104+
// Used by admin consolidation operations to remove duplicate KBs after moving files
105+
HardDeleteKnowledgeBase(ctx context.Context, kbUID string) error
106+
103107
// UpdateKnowledgeBaseResources updates kb_uid references in all resource tables
104108
// This is critical for atomic swap to ensure resources follow their knowledge bases
105109
UpdateKnowledgeBaseResources(ctx context.Context, fromKBUID, toKBUID types.KBUIDType) error
@@ -589,6 +593,20 @@ func (r *repository) DeleteKnowledgeBaseTx(ctx context.Context, tx *gorm.DB, own
589593
return nil
590594
}
591595

596+
// HardDeleteKnowledgeBase permanently deletes a KB and CASCADE removes file_knowledge_base associations
597+
// Used by admin consolidation operations to remove duplicate KBs after moving files
598+
func (r *repository) HardDeleteKnowledgeBase(ctx context.Context, kbUID string) error {
599+
// Use Unscoped to bypass soft delete and perform actual DELETE
600+
result := r.db.WithContext(ctx).Unscoped().Delete(&KnowledgeBaseModel{}, "uid = ?", kbUID)
601+
if result.Error != nil {
602+
return fmt.Errorf("hard deleting knowledge base: %w", result.Error)
603+
}
604+
if result.RowsAffected == 0 {
605+
return fmt.Errorf("knowledge base not found: %s", kbUID)
606+
}
607+
return nil
608+
}
609+
592610
// UpdateKBUIDInResources updates kb_uid in all resource tables (files, chunks, embeddings, converted_files)
593611
// This is CRITICAL for atomic swap: when KBs are swapped, all resources must follow their KBs.
594612
// Without this, queries will fail because resources point to old KB UIDs.

0 commit comments

Comments
 (0)