Skip to content

Commit ed9e1ed

Browse files
committed
feat: add batch export and batch export zip endpoints for documents
1 parent 668d485 commit ed9e1ed

File tree

8 files changed

+343
-4
lines changed

8 files changed

+343
-4
lines changed

apps/knowledge/serializers/document.py

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525
from xlwt import Utils
2626

2727
from common.db.search import native_search, get_dynamics_model, native_page_search
28-
from common.event.listener_manage import ListenerManagement
2928
from common.event.common import work_thread_pool
29+
from common.event.listener_manage import ListenerManagement
3030
from common.exception.app_exception import AppApiException
3131
from common.field.common import UploadedFileField
3232
from common.handle.impl.qa.csv_parse_qa_handle import CsvParseQAHandle
@@ -1332,6 +1332,69 @@ def batch_add_tag(self, instance: Dict, with_valid=True):
13321332
if new_relations:
13331333
QuerySet(DocumentTag).bulk_create(new_relations)
13341334

1335+
1336+
def batch_export(self, instance: Dict, with_valid=True):
1337+
if with_valid:
1338+
BatchSerializer(data=instance).is_valid(model=Document, raise_exception=True)
1339+
self.is_valid(raise_exception=True)
1340+
document_ids = instance.get("id_list")
1341+
document_list = QuerySet(Document).filter(id__in=document_ids)
1342+
paragraph_list = native_search(
1343+
QuerySet(Paragraph).filter(document_id__in=document_ids),
1344+
get_file_content(
1345+
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_paragraph_document_name.sql')
1346+
)
1347+
)
1348+
problem_mapping_list = native_search(
1349+
QuerySet(ProblemParagraphMapping).filter(document_id__in=document_ids),
1350+
get_file_content(os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem_mapping.sql')),
1351+
with_table_name=True
1352+
)
1353+
data_dict, document_dict = DocumentSerializers.Operate.merge_problem(
1354+
paragraph_list, problem_mapping_list, document_list
1355+
)
1356+
workbook = DocumentSerializers.Operate.get_workbook(data_dict, document_dict)
1357+
response = HttpResponse(content_type='application/vnd.ms-excel')
1358+
response['Content-Disposition'] = 'attachment; filename="knowledge.xlsx"'
1359+
workbook.save(response)
1360+
return response
1361+
1362+
def batch_export_zip(self, instance: Dict, with_valid=True):
1363+
if with_valid:
1364+
BatchSerializer(data=instance).is_valid(model=Document, raise_exception=True)
1365+
self.is_valid(raise_exception=True)
1366+
document_ids = instance.get("id_list")
1367+
document_list = QuerySet(Document).filter(id__in=document_ids)
1368+
paragraph_list = native_search(
1369+
QuerySet(Paragraph).filter(document_id__in=document_ids),
1370+
get_file_content(
1371+
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_paragraph_document_name.sql')
1372+
)
1373+
)
1374+
problem_mapping_list = native_search(
1375+
QuerySet(ProblemParagraphMapping).filter(document_id__in=document_ids),
1376+
get_file_content(os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem_mapping.sql')),
1377+
with_table_name=True
1378+
)
1379+
data_dict, document_dict = DocumentSerializers.Operate.merge_problem(
1380+
paragraph_list, problem_mapping_list, document_list
1381+
)
1382+
res = [parse_image(paragraph.get('content')) for paragraph in paragraph_list]
1383+
1384+
workbook = DocumentSerializers.Operate.get_workbook(data_dict, document_dict)
1385+
response = HttpResponse(content_type='application/zip')
1386+
response['Content-Disposition'] = f'attachment; filename="knowledge.zip"'
1387+
zip_buffer = io.BytesIO()
1388+
with TemporaryDirectory() as tempdir:
1389+
knowledge_file = os.path.join(tempdir, 'knowledge.xlsx')
1390+
workbook.save(knowledge_file)
1391+
for r in res:
1392+
write_image(tempdir, r)
1393+
zip_dir(tempdir, zip_buffer)
1394+
response.write(zip_buffer.getvalue())
1395+
return response
1396+
1397+
13351398
class BatchGenerateRelated(serializers.Serializer):
13361399
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
13371400
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))

apps/knowledge/urls.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch_delete', views.DocumentView.BatchDelete.as_view()),
3838
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch_refresh', views.DocumentView.BatchRefresh.as_view()),
3939
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch_generate_related', views.DocumentView.BatchGenerateRelated.as_view()),
40+
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch_export', views.DocumentView.BatchExport.as_view()),
41+
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch_export_zip', views.DocumentView.BatchExportZip.as_view()),
4042
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/web', views.WebDocumentView.as_view()),
4143
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/qa', views.QaDocumentView.as_view()),
4244
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/table', views.TableDocumentView.as_view()),

apps/knowledge/views/document.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,54 @@ def put(self, request: Request, workspace_id: str, knowledge_id: str):
571571
data={'workspace_id': workspace_id, 'knowledge_id': knowledge_id}
572572
).batch_generate_related(request.data))
573573

574+
class BatchExport(APIView):
575+
authentication_classes = [TokenAuth]
576+
577+
@has_permissions(
578+
PermissionConstants.KNOWLEDGE_DOCUMENT_EXPORT.get_workspace_knowledge_permission(),
579+
PermissionConstants.KNOWLEDGE_DOCUMENT_EXPORT.get_workspace_permission_workspace_manage_role(),
580+
RoleConstants.WORKSPACE_MANAGE.get_workspace_role(),
581+
ViewPermission([RoleConstants.USER.get_workspace_role()],
582+
[PermissionConstants.KNOWLEDGE.get_workspace_knowledge_permission()], CompareConstants.AND),
583+
)
584+
@log(
585+
menu='document', operate="Export multiple document",
586+
get_operation_object=lambda r, keywords: get_knowledge_document_operation_object(
587+
get_knowledge_operation_object(keywords.get('knowledge_id')),
588+
get_document_operation_object(keywords.get('document_id'))
589+
),
590+
)
591+
def post(self, request: Request, workspace_id: str, knowledge_id: str):
592+
return DocumentSerializers.Batch(data={
593+
'workspace_id': workspace_id,
594+
'knowledge_id': knowledge_id,
595+
'user_id': request.user.id
596+
}).batch_export({'id_list': request.data})
597+
598+
class BatchExportZip(APIView):
599+
authentication_classes = [TokenAuth]
600+
601+
@has_permissions(
602+
PermissionConstants.KNOWLEDGE_DOCUMENT_EXPORT.get_workspace_knowledge_permission(),
603+
PermissionConstants.KNOWLEDGE_DOCUMENT_EXPORT.get_workspace_permission_workspace_manage_role(),
604+
RoleConstants.WORKSPACE_MANAGE.get_workspace_role(),
605+
ViewPermission([RoleConstants.USER.get_workspace_role()],
606+
[PermissionConstants.KNOWLEDGE.get_workspace_knowledge_permission()], CompareConstants.AND),
607+
)
608+
@log(
609+
menu='document', operate="Export multiple document",
610+
get_operation_object=lambda r, keywords: get_knowledge_document_operation_object(
611+
get_knowledge_operation_object(keywords.get('knowledge_id')),
612+
get_document_operation_object(keywords.get('document_id'))
613+
),
614+
)
615+
def post(self, request: Request, workspace_id: str, knowledge_id: str):
616+
return DocumentSerializers.Batch(data={
617+
'workspace_id': workspace_id,
618+
'knowledge_id': knowledge_id,
619+
'user_id': request.user.id
620+
}).batch_export_zip({'id_list': request.data})
621+
574622
class Page(APIView):
575623
authentication_classes = [TokenAuth]
576624

ui/src/api/knowledge/document.ts

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
import { Result } from '@/request/Result'
2-
import { del, exportExcel, exportFile, get, post, put } from '@/request/index'
2+
import {
3+
del,
4+
exportExcel,
5+
exportExcelPost,
6+
exportFile,
7+
exportFilePost,
8+
get,
9+
post,
10+
put
11+
} from '@/request/index'
312
import type { Ref } from 'vue'
413
import type { KeyValue, pageRequest } from '@/api/type/common'
514

@@ -173,6 +182,21 @@ const exportDocument: (
173182
loading,
174183
)
175184
}
185+
186+
const exportMulDocument: (
187+
document_name: string,
188+
knowledge_id: string,
189+
document_ids: string[],
190+
loading?: Ref<boolean>,
191+
) => Promise<any> = (document_name, knowledge_id, document_ids, loading) => {
192+
return exportExcelPost(
193+
document_name.trim() + '.xlsx',
194+
`${prefix.value}/${knowledge_id}/document/batch_export`,
195+
{},
196+
document_ids,
197+
loading,
198+
)
199+
}
176200
/**
177201
* 导出文档
178202
* @param document_name 文档名称
@@ -195,6 +219,21 @@ const exportDocumentZip: (
195219
)
196220
}
197221

222+
const exportMulDocumentZip: (
223+
document_name: string,
224+
knowledge_id: string,
225+
document_ids: string[],
226+
loading?: Ref<boolean>,
227+
) => Promise<any> = (document_name, knowledge_id, document_ids, loading) => {
228+
return exportFilePost(
229+
document_name.trim() + '.zip',
230+
`${prefix.value}/${knowledge_id}/document/batch_export_zip`,
231+
{},
232+
document_ids,
233+
loading,
234+
)
235+
}
236+
198237
/**
199238
* 刷新文档向量库
200239
* @param 参数
@@ -619,6 +658,8 @@ export default {
619658
postReplaceSourceFile,
620659
exportDocument,
621660
exportDocumentZip,
661+
exportMulDocument,
662+
exportMulDocumentZip,
622663
putDocumentRefresh,
623664
putDocumentSync,
624665
putMulDocument,

ui/src/api/system-resource-management/document.ts

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
import { Result } from '@/request/Result'
2-
import { get, post, del, put, exportExcel, exportFile } from '@/request/index'
2+
import {
3+
get,
4+
post,
5+
del,
6+
put,
7+
exportExcel,
8+
exportFile,
9+
exportFilePost,
10+
exportExcelPost
11+
} from '@/request/index'
312
import type { Ref } from 'vue'
413
import type { KeyValue } from '@/api/type/common'
514
import type { pageRequest } from '@/api/type/common'
@@ -165,6 +174,21 @@ const exportDocument: (
165174
loading,
166175
)
167176
}
177+
178+
const exportMulDocument: (
179+
document_name: string,
180+
knowledge_id: string,
181+
document_ids: string[],
182+
loading?: Ref<boolean>,
183+
) => Promise<any> = (document_name, knowledge_id, document_ids, loading) => {
184+
return exportExcelPost(
185+
document_name.trim() + '.xlsx',
186+
`${prefix}/${knowledge_id}/document/batch_export`,
187+
{},
188+
document_ids,
189+
loading,
190+
)
191+
}
168192
/**
169193
* 导出文档
170194
* @param document_name 文档名称
@@ -187,6 +211,20 @@ const exportDocumentZip: (
187211
)
188212
}
189213

214+
const exportMulDocumentZip: (
215+
document_name: string,
216+
knowledge_id: string,
217+
document_ids: string[],
218+
loading?: Ref<boolean>,
219+
) => Promise<any> = (document_name, knowledge_id, document_ids, loading) => {
220+
return exportFilePost(
221+
document_name.trim() + '.zip',
222+
`${prefix}/${knowledge_id}/document/batch_export_zip`,
223+
{},
224+
document_ids,
225+
loading,
226+
)
227+
}
190228
/**
191229
* 刷新文档向量库
192230
* @param 参数
@@ -580,6 +618,8 @@ export default {
580618
postReplaceSourceFile,
581619
exportDocument,
582620
exportDocumentZip,
621+
exportMulDocument,
622+
exportMulDocumentZip,
583623
putDocumentRefresh,
584624
putDocumentSync,
585625
putMulDocument,

ui/src/api/system-shared/document.ts

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
import { Result } from '@/request/Result'
2-
import { get, post, del, put, exportExcel, exportFile } from '@/request/index'
2+
import {
3+
get,
4+
post,
5+
del,
6+
put,
7+
exportExcel,
8+
exportFile,
9+
exportExcelPost,
10+
exportFilePost
11+
} from '@/request/index'
312
import type { Ref } from 'vue'
413
import type { KeyValue } from '@/api/type/common'
514
import type { pageRequest } from '@/api/type/common'
@@ -165,6 +174,21 @@ const exportDocument: (
165174
loading,
166175
)
167176
}
177+
178+
const exportMulDocument: (
179+
document_name: string,
180+
knowledge_id: string,
181+
document_ids: string[],
182+
loading?: Ref<boolean>,
183+
) => Promise<any> = (document_name, knowledge_id, document_ids, loading) => {
184+
return exportExcelPost(
185+
document_name.trim() + '.xlsx',
186+
`${prefix}/${knowledge_id}/document/batch_export`,
187+
{},
188+
document_ids,
189+
loading,
190+
)
191+
}
168192
/**
169193
* 导出文档
170194
* @param document_name 文档名称
@@ -187,6 +211,20 @@ const exportDocumentZip: (
187211
)
188212
}
189213

214+
const exportMulDocumentZip: (
215+
document_name: string,
216+
knowledge_id: string,
217+
document_ids: string[],
218+
loading?: Ref<boolean>,
219+
) => Promise<any> = (document_name, knowledge_id, document_ids, loading) => {
220+
return exportFilePost(
221+
document_name.trim() + '.zip',
222+
`${prefix}/${knowledge_id}/document/batch_export_zip`,
223+
{},
224+
document_ids,
225+
loading,
226+
)
227+
}
190228
/**
191229
* 刷新文档向量库
192230
* @param 参数
@@ -581,6 +619,8 @@ export default {
581619
postReplaceSourceFile,
582620
exportDocument,
583621
exportDocumentZip,
622+
exportMulDocument,
623+
exportMulDocumentZip,
584624
putDocumentRefresh,
585625
putDocumentSync,
586626
putMulDocument,

0 commit comments

Comments
 (0)