Compare commits

...

2 Commits

Author SHA1 Message Date
8abea44aa7 feat(frontend): add rag document parse controls 2026-05-21 23:20:51 +08:00
1de773405f feat(rag): add document parsing structures 2026-05-21 23:20:09 +08:00
52 changed files with 2394 additions and 620 deletions

View File

@@ -47,6 +47,31 @@ export interface RagDocumentBatchUploadRequest {
remark?: string;
}
export type RagChunkStrategy =
| 'FIXED_LENGTH'
| 'PARAGRAPH'
| 'HEADING'
| 'TABLE_ROW'
| 'DELIMITER'
| 'SEMANTIC';
export interface RagDocumentParseRequest {
documentIds: string[];
chunkStrategy: RagChunkStrategy;
chunkSize?: number;
chunkOverlap?: number;
delimiter?: string;
}
export interface RagDocumentParseResponse {
documentId: string;
parseStatus: string;
textLength?: number | null;
pageCount?: number | null;
sheetCount?: number | null;
metadata?: Record<string, unknown>;
}
export function listRagDocuments() {
return post<RagDocument[]>('/rag/documents/list');
}
@@ -86,3 +111,7 @@ export function batchUploadRagDocuments(data: RagDocumentBatchUploadRequest) {
}
return post<RagDocument[], FormData>('/rag/documents/batchUpload', formData);
}
export function parseRagDocuments(data: RagDocumentParseRequest) {
return post<RagDocumentParseResponse[], RagDocumentParseRequest>('/rag/documents/parse', data);
}

View File

@@ -0,0 +1,184 @@
<script setup lang="ts">
import { UploadFilled } from '@element-plus/icons-vue';
import { ElMessage, type UploadFile, type UploadUserFile } from 'element-plus';
import { computed, ref, watch } from 'vue';
import {
batchUploadRagDocuments,
SOURCE_TYPE_RAG,
} from '@/api/ragDocuments';
import type { RagStore } from '@/api/ragStores';
const props = defineProps<{
modelValue: boolean;
stores: RagStore[];
lockedStoreId?: string | null;
}>();
const emit = defineEmits<{
(event: 'update:modelValue', value: boolean): void;
(event: 'uploaded'): void;
}>();
const submitting = ref(false);
const uploadStoreId = ref('');
const uploadSummary = ref('');
const uploadRemark = ref('');
const uploadFiles = ref<File[]>([]);
const uploadFileList = ref<UploadUserFile[]>([]);
const visible = computed({
get: () => props.modelValue,
set: (value: boolean) => emit('update:modelValue', value),
});
const storeLocked = computed(() => Boolean(props.lockedStoreId));
const lockedStoreName = computed(() => {
const store = props.stores.find((item) => String(item.id) === props.lockedStoreId);
return store?.storeName ?? '-';
});
watch(
() => props.modelValue,
(value) => {
if (!value) {
return;
}
const firstStore = props.stores[0];
uploadStoreId.value = props.lockedStoreId || (firstStore ? String(firstStore.id) : '');
uploadSummary.value = '';
uploadRemark.value = '';
uploadFiles.value = [];
uploadFileList.value = [];
},
);
function syncUploadFiles(fileList: UploadFile[]) {
const files: File[] = [];
fileList.forEach((file) => {
if (file.raw) {
files.push(file.raw as File);
}
});
uploadFiles.value = files;
}
function handleUploadChange(_file: UploadFile, fileList: UploadFile[]) {
syncUploadFiles(fileList);
}
function handleUploadRemove(_file: UploadFile, fileList: UploadFile[]) {
syncUploadFiles(fileList);
}
async function submitUpload() {
if (!uploadStoreId.value) {
ElMessage.warning('请选择知识库');
return;
}
if (uploadFiles.value.length === 0) {
ElMessage.warning('请选择要上传的文件');
return;
}
submitting.value = true;
try {
await batchUploadRagDocuments({
storeId: uploadStoreId.value,
sourceType: SOURCE_TYPE_RAG,
files: uploadFiles.value,
documentSummary: uploadSummary.value || undefined,
remark: uploadRemark.value || undefined,
});
visible.value = false;
ElMessage.success('文档已上传');
emit('uploaded');
} finally {
submitting.value = false;
}
}
</script>
<template>
<el-dialog v-model="visible" title="批量上传文档" width="560px">
<el-form label-width="96px">
<el-form-item label="知识库" required>
<el-input
v-if="storeLocked"
:model-value="lockedStoreName"
data-test="batch-upload-locked-store"
disabled
/>
<el-select
v-else
v-model="uploadStoreId"
data-test="batch-upload-store-select"
placeholder="请选择知识库"
style="width: 100%"
>
<el-option
v-for="store in stores"
:key="String(store.id)"
:label="store.storeName"
:value="String(store.id)"
/>
</el-select>
</el-form-item>
<el-form-item label="选择文件" required>
<el-upload
v-model:file-list="uploadFileList"
class="batch-upload-dropzone"
drag
multiple
:auto-upload="false"
accept=".pdf,.doc,.docx,.txt,.md"
:on-change="handleUploadChange"
:on-remove="handleUploadRemove"
>
<el-icon class="el-icon--upload"><UploadFilled /></el-icon>
<div class="el-upload__text">
拖拽文件到此处 <em>点击选择</em>
</div>
<template #tip>
<div class="el-upload__tip">支持 PDFWordTXTMarkdown 等格式</div>
</template>
</el-upload>
</el-form-item>
<el-form-item label="文档摘要">
<el-input
v-model="uploadSummary"
type="textarea"
:rows="2"
placeholder="可选,将统一设置到所有上传文档"
/>
</el-form-item>
<el-form-item label="备注">
<el-input
v-model="uploadRemark"
type="textarea"
:rows="2"
placeholder="可选"
/>
</el-form-item>
</el-form>
<template #footer>
<el-button @click="visible = false">取消</el-button>
<el-button type="primary" :loading="submitting" @click="submitUpload">上传</el-button>
</template>
</el-dialog>
</template>
<style scoped>
.batch-upload-dropzone {
width: 100%;
}
.batch-upload-dropzone :deep(.el-upload) {
width: 100%;
}
.batch-upload-dropzone :deep(.el-upload-dragger) {
width: 100%;
padding: 28px 16px;
}
</style>

View File

@@ -1,478 +0,0 @@
<script setup lang="ts">
import { Search, UploadFilled } from '@element-plus/icons-vue';
import { ElMessage, ElMessageBox } from 'element-plus';
import { computed, onMounted, reactive, ref } from 'vue';
import {
batchUploadRagDocuments,
deleteRagDocument,
getRagDocumentById,
listRagDocuments,
saveRagDocument,
SOURCE_TYPE_RAG,
type RagDocument,
} from '@/api/ragDocuments';
import { queryRagStores, type RagStore } from '@/api/ragStores';
const loading = ref(false);
const submitting = ref(false);
const storeOptions = ref<RagStore[]>([]);
const docRows = ref<RagDocument[]>([]);
const queryForm = reactive({
storeId: '',
parseStatus: '',
indexStatus: '',
enabled: '' as string,
keyword: '',
});
const editDialogVisible = ref(false);
const uploadDialogVisible = ref(false);
const uploadStoreId = ref('');
const uploadSummary = ref('');
const uploadRemark = ref('');
const editForm = reactive({
id: '',
storeId: '',
attachmentId: '',
documentTitle: '',
documentSummary: '',
enabled: true,
remark: '',
});
const filteredRows = computed(() => {
const kw = queryForm.keyword.trim().toLowerCase();
return docRows.value.filter(
(row) => {
const matchStore = !queryForm.storeId || row.storeId === queryForm.storeId;
const matchParseStatus = !queryForm.parseStatus || row.parseStatus === queryForm.parseStatus;
const matchIndexStatus = !queryForm.indexStatus || row.indexStatus === queryForm.indexStatus;
const matchEnabled = !queryForm.enabled || String(row.enabled ?? false) === queryForm.enabled;
const matchKeyword =
!kw ||
(row.documentTitle && row.documentTitle.toLowerCase().includes(kw)) ||
(row.documentSummary && row.documentSummary.toLowerCase().includes(kw)) ||
(row.remark && row.remark.toLowerCase().includes(kw));
return matchStore && matchParseStatus && matchIndexStatus && matchEnabled && matchKeyword;
},
);
});
async function loadStores() {
try {
const response = await queryRagStores();
storeOptions.value = response.data ?? [];
} catch {
storeOptions.value = [];
}
}
async function loadDocs() {
loading.value = true;
try {
const response = await listRagDocuments();
docRows.value = response.data ?? [];
} finally {
loading.value = false;
}
}
function handleSearch() {
loadDocs();
}
function handleReset() {
queryForm.storeId = '';
queryForm.parseStatus = '';
queryForm.indexStatus = '';
queryForm.enabled = '';
queryForm.keyword = '';
loadDocs();
}
function openUploadDialog() {
if (storeOptions.value.length === 0) {
ElMessage.warning('请先创建知识库');
return;
}
const firstStore = storeOptions.value[0];
uploadStoreId.value = queryForm.storeId || (firstStore ? String(firstStore.id) : '');
uploadSummary.value = '';
uploadRemark.value = '';
uploadDialogVisible.value = true;
}
async function submitUpload() {
if (!uploadStoreId.value) {
ElMessage.warning('请选择知识库');
return;
}
const files = (document.getElementById('rag-file-input') as HTMLInputElement)?.files;
if (!files || files.length === 0) {
ElMessage.warning('请选择要上传的文件');
return;
}
submitting.value = true;
try {
await batchUploadRagDocuments({
storeId: uploadStoreId.value,
sourceType: SOURCE_TYPE_RAG,
files: Array.from(files),
documentSummary: uploadSummary.value || undefined,
remark: uploadRemark.value || undefined,
});
uploadDialogVisible.value = false;
ElMessage.success('文档已上传');
await loadDocs();
} finally {
submitting.value = false;
}
}
async function openEditDialog(row: RagDocument) {
const detail = row.id ? (await getRagDocumentById(String(row.id))).data : row;
editForm.id = String(detail.id ?? '');
editForm.storeId = detail.storeId;
editForm.attachmentId = detail.attachmentId ?? '';
editForm.documentTitle = detail.documentTitle ?? '';
editForm.documentSummary = detail.documentSummary ?? '';
editForm.enabled = detail.enabled ?? true;
editForm.remark = detail.remark ?? '';
editDialogVisible.value = true;
}
async function submitEdit() {
if (!editForm.id || !editForm.storeId || !editForm.documentTitle) {
ElMessage.warning('请填写文档标题');
return;
}
submitting.value = true;
try {
await saveRagDocument({
id: editForm.id,
storeId: editForm.storeId,
attachmentId: editForm.attachmentId || undefined,
documentTitle: editForm.documentTitle,
documentSummary: editForm.documentSummary || undefined,
enabled: editForm.enabled,
remark: editForm.remark || undefined,
});
editDialogVisible.value = false;
ElMessage.success('文档信息已更新');
await loadDocs();
} finally {
submitting.value = false;
}
}
async function removeDoc(row: RagDocument) {
if (!row.id) return;
await ElMessageBox.confirm(
`确认删除文档「${row.documentTitle || '未命名'}」?`,
'删除确认',
{ type: 'warning', confirmButtonText: '删除', cancelButtonText: '取消' },
);
await deleteRagDocument(String(row.id));
ElMessage.success('文档已删除');
await loadDocs();
}
function toggleEnabled(row: RagDocument) {
if (!row.id) return;
const newEnabled = !row.enabled;
saveRagDocument({
id: String(row.id),
storeId: row.storeId,
documentTitle: row.documentTitle ?? '',
enabled: newEnabled,
}).then(() => {
row.enabled = newEnabled;
ElMessage.success(`${newEnabled ? '启用' : '停用'}`);
});
}
function getStoreName(storeId: string) {
const store = storeOptions.value.find((s) => String(s.id) === storeId);
return store ? store.storeName : '-';
}
function getStatusLabel(status?: string | null) {
const map: Record<string, string> = {
UPLOADED: '已上传',
PARSING: '解析中',
PARSED: '已解析',
FAILED: '解析失败',
PENDING: '待索引',
INDEXING: '索引中',
INDEXED: '已索引',
};
return status ? (map[status] ?? status) : '-';
}
function getParseStatusType(status?: string | null) {
const success = ['PARSED'];
const warning = ['UPLOADED', 'PARSING'];
const danger = ['FAILED'];
if (!status) return 'info';
if (success.includes(status)) return 'success';
if (warning.includes(status)) return 'warning';
if (danger.includes(status)) return 'danger';
return 'info';
}
function getIndexStatusType(status?: string | null) {
const success = ['INDEXED'];
const warning = ['PENDING', 'INDEXING'];
const danger = ['FAILED'];
if (!status) return 'info';
if (success.includes(status)) return 'success';
if (warning.includes(status)) return 'warning';
if (danger.includes(status)) return 'danger';
return 'info';
}
onMounted(() => {
loadStores();
loadDocs();
});
</script>
<template>
<section class="page-panel rag-doc-page">
<div class="page-panel__header">
<h2>知识文档</h2>
<span>Documents</span>
</div>
<div class="toolbar">
<div class="toolbar__filters">
<el-select
v-model="queryForm.storeId"
data-test="doc-store-filter"
placeholder="选择知识库"
clearable
style="width: 180px"
>
<el-option
v-for="store in storeOptions"
:key="String(store.id)"
:label="store.storeName"
:value="String(store.id)"
/>
</el-select>
<el-select
v-model="queryForm.parseStatus"
data-test="doc-parse-filter"
placeholder="解析状态"
clearable
style="width: 130px"
>
<el-option label="已上传" value="UPLOADED" />
<el-option label="解析中" value="PARSING" />
<el-option label="已解析" value="PARSED" />
<el-option label="解析失败" value="FAILED" />
</el-select>
<el-select
v-model="queryForm.indexStatus"
data-test="doc-index-filter"
placeholder="索引状态"
clearable
style="width: 130px"
>
<el-option label="待索引" value="PENDING" />
<el-option label="索引中" value="INDEXING" />
<el-option label="已索引" value="INDEXED" />
<el-option label="索引失败" value="FAILED" />
</el-select>
<el-select
v-model="queryForm.enabled"
data-test="doc-enabled-filter"
placeholder="启用状态"
clearable
style="width: 120px"
>
<el-option label="启用" value="true" />
<el-option label="停用" value="false" />
</el-select>
<el-input
v-model="queryForm.keyword"
data-test="doc-keyword-input"
placeholder="搜索标题/摘要/备注"
clearable
style="width: 180px"
@keyup.enter="handleSearch"
/>
</div>
<div class="toolbar__actions">
<el-button type="primary" :icon="UploadFilled" @click="openUploadDialog">批量上传</el-button>
<el-button data-test="doc-search" type="primary" :icon="Search" @click="handleSearch">查询</el-button>
<el-button @click="handleReset">重置</el-button>
</div>
</div>
<el-table v-loading="loading" :data="filteredRows" border stripe style="width: 100%">
<el-table-column prop="documentTitle" label="文档标题" min-width="180" show-overflow-tooltip />
<el-table-column label="所属知识库" width="150">
<template #default="{ row }">
{{ getStoreName(row.storeId) }}
</template>
</el-table-column>
<el-table-column label="解析状态" width="110">
<template #default="{ row }">
<el-tag :type="getParseStatusType(row.parseStatus)" size="small">
{{ getStatusLabel(row.parseStatus) }}
</el-tag>
</template>
</el-table-column>
<el-table-column label="索引状态" width="110">
<template #default="{ row }">
<el-tag :type="getIndexStatusType(row.indexStatus)" size="small">
{{ getStatusLabel(row.indexStatus) }}
</el-tag>
</template>
</el-table-column>
<el-table-column label="启用" width="80" align="center">
<template #default="{ row }">
<el-switch
:model-value="row.enabled ?? false"
size="small"
@change="toggleEnabled(row)"
/>
</template>
</el-table-column>
<el-table-column prop="documentSummary" label="摘要" min-width="160" show-overflow-tooltip />
<el-table-column prop="createTime" label="创建时间" width="170" />
<el-table-column label="操作" width="160" fixed="right">
<template #default="{ row }">
<el-button :data-test="`doc-edit-${row.id}`" link type="primary" @click="openEditDialog(row)">编辑</el-button>
<el-button link type="danger" @click="removeDoc(row)">删除</el-button>
</template>
</el-table-column>
</el-table>
<el-empty v-if="!loading && filteredRows.length === 0" description="暂无知识文档" />
<!-- 上传对话框 -->
<el-dialog v-model="uploadDialogVisible" title="批量上传文档" width="560px">
<el-form label-width="96px">
<el-form-item label="知识库" required>
<el-select v-model="uploadStoreId" placeholder="请选择知识库" style="width: 100%">
<el-option
v-for="store in storeOptions"
:key="String(store.id)"
:label="store.storeName"
:value="String(store.id)"
/>
</el-select>
</el-form-item>
<el-form-item label="选择文件" required>
<input id="rag-file-input" type="file" multiple accept=".pdf,.doc,.docx,.txt,.md" />
<p class="form-hint">支持 PDFWordTXTMarkdown 等格式</p>
</el-form-item>
<el-form-item label="文档摘要">
<el-input
v-model="uploadSummary"
type="textarea"
:rows="2"
placeholder="可选,将统一设置到所有上传文档"
/>
</el-form-item>
<el-form-item label="备注">
<el-input
v-model="uploadRemark"
type="textarea"
:rows="2"
placeholder="可选"
/>
</el-form-item>
</el-form>
<template #footer>
<el-button @click="uploadDialogVisible = false">取消</el-button>
<el-button type="primary" :loading="submitting" @click="submitUpload">上传</el-button>
</template>
</el-dialog>
<!-- 编辑对话框 -->
<el-dialog v-model="editDialogVisible" title="编辑文档" width="560px">
<el-form :model="editForm" label-width="96px">
<el-form-item label="文档标题" required>
<el-input v-model="editForm.documentTitle" />
</el-form-item>
<el-form-item label="文档摘要">
<el-input v-model="editForm.documentSummary" type="textarea" :rows="3" />
</el-form-item>
<el-form-item label="启用">
<el-switch v-model="editForm.enabled" />
</el-form-item>
<el-form-item label="备注">
<el-input v-model="editForm.remark" type="textarea" :rows="2" />
</el-form-item>
</el-form>
<template #footer>
<el-button @click="editDialogVisible = false">取消</el-button>
<el-button type="primary" :loading="submitting" @click="submitEdit">保存</el-button>
</template>
</el-dialog>
</section>
</template>
<style scoped>
.rag-doc-page {
display: flex;
flex-direction: column;
}
.toolbar {
display: flex;
justify-content: space-between;
align-items: flex-start;
gap: 16px;
padding: 16px 22px;
border-bottom: 1px solid #eef2f7;
flex-wrap: wrap;
}
.toolbar__filters {
display: flex;
gap: 10px;
flex-wrap: wrap;
flex: 1;
}
.toolbar__actions {
display: flex;
gap: 8px;
flex-shrink: 0;
}
.form-hint {
margin: 4px 0 0;
color: #98a2b3;
font-size: 12px;
}
@media (max-width: 768px) {
.toolbar {
flex-direction: column;
align-items: stretch;
}
.toolbar__filters {
flex-direction: column;
}
.toolbar__actions {
justify-content: flex-end;
}
}
</style>

View File

@@ -1,125 +0,0 @@
import { flushPromises, mount } from '@vue/test-utils';
import ElementPlus from 'element-plus';
import { describe, expect, it, vi } from 'vitest';
import RagDocumentsPage from '../RagDocumentsPage.vue';
import { getRagDocumentById, listRagDocuments, queryRagDocuments } from '@/api/ragDocuments';
import { queryRagStores } from '@/api/ragStores';
vi.mock('@/api/ragStores', () => ({
queryRagStores: vi.fn(() =>
Promise.resolve({
resultcode: '0',
message: null,
data: [
{ id: '1', storeCode: 'PROD_DOC', storeName: '产品制度库', status: '启用' },
{ id: '2', storeCode: 'FAQ', storeName: 'FAQ知识库', status: '停用' },
],
}),
),
}));
vi.mock('@/api/ragDocuments', () => ({
SOURCE_TYPE_RAG: 'RAG',
listRagDocuments: vi.fn(() =>
Promise.resolve({
resultcode: '0',
message: null,
data: [
{
id: '11',
storeId: '1',
attachmentId: '101',
documentTitle: '产品制度总则',
documentSummary: '制度摘要',
parseStatus: 'UPLOADED',
indexStatus: 'PENDING',
enabled: true,
remark: '制度文档',
createTime: '2026-05-21 10:00:00',
},
{
id: '22',
storeId: '2',
attachmentId: '202',
documentTitle: 'FAQ 手册',
documentSummary: 'FAQ 摘要',
parseStatus: 'PARSED',
indexStatus: 'INDEXED',
enabled: false,
remark: '常见问题',
createTime: '2026-05-21 11:00:00',
},
],
}),
),
queryRagDocuments: vi.fn(),
getRagDocumentById: vi.fn((id: string) =>
Promise.resolve({
resultcode: '0',
message: null,
data: {
id,
storeId: '2',
attachmentId: '202',
documentTitle: 'FAQ 手册',
documentSummary: 'FAQ 摘要',
enabled: false,
remark: '常见问题',
},
}),
),
saveRagDocument: vi.fn(() => Promise.resolve({ resultcode: '0', message: null, data: true })),
deleteRagDocument: vi.fn(() => Promise.resolve({ resultcode: '0', message: null, data: true })),
batchUploadRagDocuments: vi.fn(() => Promise.resolve({ resultcode: '0', message: null, data: [] })),
}));
describe('RagDocumentsPage', () => {
it('loads documents from list api instead of broken query api', async () => {
const wrapper = mount(RagDocumentsPage, {
global: {
plugins: [ElementPlus],
},
});
await flushPromises();
expect(queryRagStores).toHaveBeenCalled();
expect(listRagDocuments).toHaveBeenCalled();
expect(queryRagDocuments).not.toHaveBeenCalled();
expect(wrapper.text()).toContain('产品制度总则');
expect(wrapper.text()).toContain('FAQ 手册');
});
it('filters rows locally and still avoids query api on search', async () => {
const wrapper = mount(RagDocumentsPage, {
global: {
plugins: [ElementPlus],
},
});
await flushPromises();
await wrapper.get('[data-test="doc-keyword-input"]').setValue('FAQ');
await wrapper.get('[data-test="doc-search"]').trigger('click');
await flushPromises();
expect(listRagDocuments).toHaveBeenCalled();
expect(queryRagDocuments).not.toHaveBeenCalled();
expect(wrapper.text()).toContain('FAQ 手册');
expect(wrapper.text()).not.toContain('产品制度总则');
});
it('loads backend detail when editing a row', async () => {
const wrapper = mount(RagDocumentsPage, {
global: {
plugins: [ElementPlus],
},
});
await flushPromises();
await wrapper.get('[data-test="doc-edit-22"]').trigger('click');
await flushPromises();
expect(getRagDocumentById).toHaveBeenCalledWith('22');
});
});

View File

@@ -0,0 +1,638 @@
<script setup lang="ts">
import { Operation, Search, UploadFilled } from '@element-plus/icons-vue';
import { ElMessage, ElMessageBox } from 'element-plus';
import { computed, onMounted, reactive, ref } from 'vue';
import { useRoute } from 'vue-router';
import {
deleteRagDocument,
getRagDocumentById,
parseRagDocuments,
queryRagDocuments,
saveRagDocument,
type RagChunkStrategy,
type RagDocument,
} from '@/api/ragDocuments';
import { queryRagStores, type RagStore } from '@/api/ragStores';
import RagDocumentBatchUploadDialog from '@/components/rag/RagDocumentBatchUploadDialog.vue';
const loading = ref(false);
const submitting = ref(false);
const parseSubmitting = ref(false);
const route = useRoute();
const storeOptions = ref<RagStore[]>([]);
const docRows = ref<RagDocument[]>([]);
const queryForm = reactive({
storeId: '',
parseStatus: '',
indexStatus: '',
enabled: '' as string,
keyword: '',
});
const editDialogVisible = ref(false);
const uploadDialogVisible = ref(false);
const parseDialogVisible = ref(false);
const selectedDocuments = ref<RagDocument[]>([]);
const editForm = reactive({
id: '',
storeId: '',
attachmentId: '',
documentTitle: '',
documentSummary: '',
enabled: true,
remark: '',
});
const parseForm = reactive({
documentIds: [] as string[],
chunkStrategy: 'FIXED_LENGTH' as RagChunkStrategy,
chunkSize: 800,
chunkOverlap: 120,
delimiter: '。',
});
const chunkStrategyOptions: Array<{ label: string; value: RagChunkStrategy; description: string }> = [
{ label: '固定长度切片', value: 'FIXED_LENGTH', description: '按指定长度和重叠长度切分通用文本' },
{ label: '按段落切片', value: 'PARAGRAPH', description: '按空行、自然段落边界切分' },
{ label: '按标题层级切片', value: 'HEADING', description: '按标题和章节层级组织内容' },
{ label: '按表格行切片', value: 'TABLE_ROW', description: '适合 Excel 表格和结构化明细数据' },
{ label: '按分隔符切片', value: 'DELIMITER', description: '按句号、换行符或自定义分隔符切分' },
{ label: '语义切片', value: 'SEMANTIC', description: '后续结合语义边界或模型能力切分' },
];
const filteredRows = computed(() => {
const kw = queryForm.keyword.trim().toLowerCase();
return docRows.value.filter(
(row) => {
const matchStore = !queryForm.storeId || row.storeId === queryForm.storeId;
const matchParseStatus = !queryForm.parseStatus || row.parseStatus === queryForm.parseStatus;
const matchIndexStatus = !queryForm.indexStatus || row.indexStatus === queryForm.indexStatus;
const matchEnabled = !queryForm.enabled || String(row.enabled ?? false) === queryForm.enabled;
const matchKeyword =
!kw ||
(row.documentTitle && row.documentTitle.toLowerCase().includes(kw)) ||
(row.documentSummary && row.documentSummary.toLowerCase().includes(kw)) ||
(row.remark && row.remark.toLowerCase().includes(kw));
return matchStore && matchParseStatus && matchIndexStatus && matchEnabled && matchKeyword;
},
);
});
async function loadStores() {
try {
const response = await queryRagStores();
storeOptions.value = response.data ?? [];
} catch {
storeOptions.value = [];
}
}
async function loadDocs() {
loading.value = true;
try {
const response = await queryRagDocuments({
...(queryForm.storeId ? { storeId: queryForm.storeId } : {}),
...(queryForm.parseStatus ? { parseStatus: queryForm.parseStatus } : {}),
...(queryForm.indexStatus ? { indexStatus: queryForm.indexStatus } : {}),
...(queryForm.enabled ? { enabled: queryForm.enabled === 'true' } : {}),
});
docRows.value = response.data ?? [];
} finally {
loading.value = false;
}
}
function handleSearch() {
loadDocs();
}
function handleReset() {
queryForm.storeId = '';
queryForm.parseStatus = '';
queryForm.indexStatus = '';
queryForm.enabled = '';
queryForm.keyword = '';
loadDocs();
}
function openUploadDialog() {
if (storeOptions.value.length === 0) {
ElMessage.warning('请先创建知识库');
return;
}
uploadDialogVisible.value = true;
}
function handleSelectionChange(rows: RagDocument[]) {
selectedDocuments.value = rows;
}
function openParseDialog(rows: RagDocument[]) {
const ids = rows.map((row) => String(row.id ?? '')).filter(Boolean);
if (ids.length === 0) {
ElMessage.warning('请选择需要解析的文档');
return;
}
parseForm.documentIds = ids;
parseForm.chunkStrategy = 'FIXED_LENGTH';
parseForm.chunkSize = 800;
parseForm.chunkOverlap = 120;
parseForm.delimiter = '。';
parseDialogVisible.value = true;
}
function openBatchParseDialog() {
openParseDialog(selectedDocuments.value);
}
async function openEditDialog(row: RagDocument) {
const detail = row.id ? (await getRagDocumentById(String(row.id))).data : row;
editForm.id = String(detail.id ?? '');
editForm.storeId = detail.storeId;
editForm.attachmentId = detail.attachmentId ?? '';
editForm.documentTitle = detail.documentTitle ?? '';
editForm.documentSummary = detail.documentSummary ?? '';
editForm.enabled = detail.enabled ?? true;
editForm.remark = detail.remark ?? '';
editDialogVisible.value = true;
}
async function submitEdit() {
if (!editForm.id || !editForm.storeId || !editForm.documentTitle) {
ElMessage.warning('请填写文档标题');
return;
}
submitting.value = true;
try {
await saveRagDocument({
id: editForm.id,
storeId: editForm.storeId,
attachmentId: editForm.attachmentId || undefined,
documentTitle: editForm.documentTitle,
documentSummary: editForm.documentSummary || undefined,
enabled: editForm.enabled,
remark: editForm.remark || undefined,
});
editDialogVisible.value = false;
ElMessage.success('文档信息已更新');
await loadDocs();
} finally {
submitting.value = false;
}
}
async function removeDoc(row: RagDocument) {
if (!row.id) return;
await ElMessageBox.confirm(
`确认删除文档「${row.documentTitle || '未命名'}」?`,
'删除确认',
{ type: 'warning', confirmButtonText: '删除', cancelButtonText: '取消' },
);
await deleteRagDocument(String(row.id));
ElMessage.success('文档已删除');
await loadDocs();
}
function toggleEnabled(row: RagDocument) {
if (!row.id) return;
const newEnabled = !row.enabled;
saveRagDocument({
id: String(row.id),
storeId: row.storeId,
documentTitle: row.documentTitle ?? '',
enabled: newEnabled,
}).then(() => {
row.enabled = newEnabled;
ElMessage.success(`${newEnabled ? '启用' : '停用'}`);
});
}
async function submitParse() {
if (parseForm.documentIds.length === 0) {
ElMessage.warning('请选择需要解析的文档');
return;
}
parseSubmitting.value = true;
try {
await parseRagDocuments({
documentIds: parseForm.documentIds,
chunkStrategy: parseForm.chunkStrategy,
chunkSize: parseForm.chunkSize,
chunkOverlap: parseForm.chunkOverlap,
delimiter: parseForm.delimiter,
});
parseDialogVisible.value = false;
ElMessage.success('解析任务已提交');
await loadDocs();
} finally {
parseSubmitting.value = false;
}
}
function getStoreName(storeId: string) {
const store = storeOptions.value.find((s) => String(s.id) === storeId);
return store ? store.storeName : '-';
}
function getStatusLabel(status?: string | null) {
const map: Record<string, string> = {
UPLOADED: '已上传',
PARSING: '解析中',
PARSED: '已解析',
FAILED: '解析失败',
PENDING: '待索引',
INDEXING: '索引中',
INDEXED: '已索引',
};
return status ? (map[status] ?? status) : '-';
}
function getParseStatusType(status?: string | null) {
const success = ['PARSED'];
const warning = ['UPLOADED', 'PARSING'];
const danger = ['FAILED'];
if (!status) return 'info';
if (success.includes(status)) return 'success';
if (warning.includes(status)) return 'warning';
if (danger.includes(status)) return 'danger';
return 'info';
}
function getIndexStatusType(status?: string | null) {
const success = ['INDEXED'];
const warning = ['PENDING', 'INDEXING'];
const danger = ['FAILED'];
if (!status) return 'info';
if (success.includes(status)) return 'success';
if (warning.includes(status)) return 'warning';
if (danger.includes(status)) return 'danger';
return 'info';
}
onMounted(() => {
const routeStoreId = route.query.storeId;
if (typeof routeStoreId === 'string') {
queryForm.storeId = routeStoreId;
}
loadStores();
loadDocs();
});
</script>
<template>
<section class="page-panel rag-doc-page">
<div class="page-panel__header">
<h2>知识文档</h2>
<span>Documents</span>
</div>
<div class="document-query-bar" data-test="document-query-bar">
<el-form class="document-query-form" data-test="document-query-form" inline>
<el-form-item label="知识库">
<el-select
v-model="queryForm.storeId"
data-test="doc-store-filter"
placeholder="请选择"
clearable
class="query-control query-control--select"
>
<el-option
v-for="store in storeOptions"
:key="String(store.id)"
:label="store.storeName"
:value="String(store.id)"
/>
</el-select>
</el-form-item>
<el-form-item label="解析状态">
<el-select
v-model="queryForm.parseStatus"
data-test="doc-parse-filter"
placeholder="请选择"
clearable
class="query-control query-control--select"
>
<el-option label="已上传" value="UPLOADED" />
<el-option label="解析中" value="PARSING" />
<el-option label="已解析" value="PARSED" />
<el-option label="解析失败" value="FAILED" />
</el-select>
</el-form-item>
<el-form-item label="索引状态">
<el-select
v-model="queryForm.indexStatus"
data-test="doc-index-filter"
placeholder="请选择"
clearable
class="query-control query-control--select"
>
<el-option label="待索引" value="PENDING" />
<el-option label="索引中" value="INDEXING" />
<el-option label="已索引" value="INDEXED" />
<el-option label="索引失败" value="FAILED" />
</el-select>
</el-form-item>
<el-form-item label="启用状态">
<el-select
v-model="queryForm.enabled"
data-test="doc-enabled-filter"
placeholder="请选择"
clearable
class="query-control query-control--select"
>
<el-option label="启用" value="true" />
<el-option label="停用" value="false" />
</el-select>
</el-form-item>
<el-form-item label="关键词">
<el-input
v-model="queryForm.keyword"
data-test="doc-keyword-input"
placeholder="搜索标题/摘要/备注"
clearable
class="query-control query-control--keyword"
@keyup.enter="handleSearch"
/>
</el-form-item>
<el-form-item class="document-query-form__actions">
<el-button data-test="doc-search" type="primary" :icon="Search" @click="handleSearch">查询</el-button>
<el-button @click="handleReset">重置</el-button>
<el-button
data-test="open-batch-parse"
:icon="Operation"
:disabled="selectedDocuments.length === 0"
@click="openBatchParseDialog"
>
批量解析
</el-button>
<el-button
data-test="open-doc-upload"
type="primary"
:icon="UploadFilled"
@click="openUploadDialog"
>
批量上传
</el-button>
</el-form-item>
</el-form>
</div>
<el-table
v-loading="loading"
:data="filteredRows"
border
stripe
style="width: 100%"
@selection-change="handleSelectionChange"
>
<el-table-column type="selection" width="48" align="center" />
<el-table-column type="index" label="编号" width="70" align="center" />
<el-table-column prop="documentTitle" label="文档标题" min-width="180" show-overflow-tooltip />
<el-table-column label="所属知识库" width="150">
<template #default="{ row }">
{{ getStoreName(row.storeId) }}
</template>
</el-table-column>
<el-table-column label="解析状态" width="110">
<template #default="{ row }">
<el-tag :type="getParseStatusType(row.parseStatus)" size="small">
{{ getStatusLabel(row.parseStatus) }}
</el-tag>
</template>
</el-table-column>
<el-table-column label="索引状态" width="110">
<template #default="{ row }">
<el-tag :type="getIndexStatusType(row.indexStatus)" size="small">
{{ getStatusLabel(row.indexStatus) }}
</el-tag>
</template>
</el-table-column>
<el-table-column label="启用" width="80" align="center">
<template #default="{ row }">
<el-switch
:model-value="row.enabled ?? false"
size="small"
@change="toggleEnabled(row)"
/>
</template>
</el-table-column>
<el-table-column prop="documentSummary" label="摘要" min-width="160" show-overflow-tooltip />
<el-table-column prop="createTime" label="创建时间" width="170" />
<el-table-column label="操作" width="210" fixed="right">
<template #default="{ row }">
<el-button :data-test="`doc-parse-${row.id}`" link type="primary" @click="openParseDialog([row])">解析</el-button>
<el-button :data-test="`doc-edit-${row.id}`" link type="primary" @click="openEditDialog(row)">编辑</el-button>
<el-button link type="danger" @click="removeDoc(row)">删除</el-button>
</template>
</el-table-column>
</el-table>
<el-empty v-if="!loading && filteredRows.length === 0" description="暂无知识文档" />
<RagDocumentBatchUploadDialog
v-model="uploadDialogVisible"
:stores="storeOptions"
:locked-store-id="queryForm.storeId || null"
@uploaded="loadDocs"
/>
<el-dialog
v-model="parseDialogVisible"
data-test="document-parse-dialog"
title="解析配置"
width="620px"
>
<el-form :model="parseForm" label-width="112px">
<el-form-item label="文档数量">
<el-tag>{{ parseForm.documentIds.length }} 个文档</el-tag>
</el-form-item>
<el-form-item label="切片方式">
<el-radio-group v-model="parseForm.chunkStrategy" class="chunk-strategy-group">
<el-radio
v-for="strategy in chunkStrategyOptions"
:key="strategy.value"
:value="strategy.value"
class="chunk-strategy-option"
>
<span class="chunk-strategy-option__label">{{ strategy.label }}</span>
<small>{{ strategy.description }}</small>
</el-radio>
</el-radio-group>
</el-form-item>
<el-form-item v-if="parseForm.chunkStrategy === 'FIXED_LENGTH'" label="切片长度">
<el-input-number v-model="parseForm.chunkSize" :min="100" :max="4000" :step="100" />
</el-form-item>
<el-form-item v-if="parseForm.chunkStrategy === 'FIXED_LENGTH'" label="重叠长度">
<el-input-number v-model="parseForm.chunkOverlap" :min="0" :max="1000" :step="20" />
</el-form-item>
<el-form-item v-if="parseForm.chunkStrategy === 'DELIMITER'" label="分隔符">
<el-input v-model="parseForm.delimiter" maxlength="20" placeholder="如 。、换行符或自定义符号" />
</el-form-item>
</el-form>
<template #footer>
<el-button @click="parseDialogVisible = false">取消</el-button>
<el-button
data-test="document-parse-submit"
type="primary"
:loading="parseSubmitting"
@click="submitParse"
>
开始解析
</el-button>
</template>
</el-dialog>
<!-- 编辑对话框 -->
<el-dialog v-model="editDialogVisible" title="编辑文档" width="560px">
<el-form :model="editForm" label-width="96px">
<el-form-item label="文档标题" required>
<el-input v-model="editForm.documentTitle" />
</el-form-item>
<el-form-item label="文档摘要">
<el-input v-model="editForm.documentSummary" type="textarea" :rows="3" />
</el-form-item>
<el-form-item label="启用">
<el-switch v-model="editForm.enabled" />
</el-form-item>
<el-form-item label="备注">
<el-input v-model="editForm.remark" type="textarea" :rows="2" />
</el-form-item>
</el-form>
<template #footer>
<el-button @click="editDialogVisible = false">取消</el-button>
<el-button type="primary" :loading="submitting" @click="submitEdit">保存</el-button>
</template>
</el-dialog>
</section>
</template>
<style scoped>
.rag-doc-page {
display: flex;
flex-direction: column;
}
.document-query-bar {
padding: 18px 28px 17px;
border-bottom: 1px solid #e8edf5;
background: #ffffff;
}
.document-query-form {
display: flex;
align-items: center;
gap: 14px;
flex-wrap: wrap;
}
.document-query-form :deep(.el-form-item) {
margin: 0;
}
.document-query-form :deep(.el-form-item__label) {
height: 38px;
padding-right: 8px;
color: #606266;
font-weight: 500;
line-height: 38px;
}
.document-query-form :deep(.el-input__wrapper),
.document-query-form :deep(.el-select__wrapper) {
min-height: 38px;
border-radius: 4px;
box-shadow: 0 0 0 1px #d8dee9 inset;
}
.document-query-form :deep(.el-input__wrapper:hover),
.document-query-form :deep(.el-select__wrapper:hover) {
box-shadow: 0 0 0 1px #b9c6d8 inset;
}
.query-control--select {
width: 168px;
}
.query-control--keyword {
width: 225px;
}
.document-query-form__actions {
margin-left: auto;
}
.document-query-form__actions :deep(.el-form-item__content) {
display: flex;
gap: 8px;
flex-wrap: wrap;
}
.chunk-strategy-group {
display: grid;
grid-template-columns: repeat(2, minmax(0, 1fr));
gap: 10px;
width: 100%;
}
.chunk-strategy-option {
align-items: flex-start;
height: auto;
margin-right: 0;
padding: 10px 12px;
border: 1px solid #d8dee9;
border-radius: 4px;
}
.chunk-strategy-option :deep(.el-radio__label) {
display: flex;
flex-direction: column;
gap: 4px;
line-height: 1.4;
}
.chunk-strategy-option__label {
color: #303133;
font-weight: 600;
}
.chunk-strategy-option small {
color: #7a8599;
font-size: 12px;
}
@media (max-width: 768px) {
.document-query-bar {
padding: 16px;
}
.document-query-form {
flex-direction: column;
align-items: stretch;
}
.query-control--select,
.query-control--keyword {
width: 100%;
}
.document-query-form__actions {
margin-left: 0;
}
.document-query-form__actions :deep(.el-form-item__content) {
justify-content: flex-end;
}
.chunk-strategy-group {
grid-template-columns: 1fr;
}
}
</style>

View File

@@ -2,6 +2,7 @@
import { CirclePlus, Delete, Edit, FolderAdd, Refresh, Search, UploadFilled } from '@element-plus/icons-vue';
import { ElMessage, ElMessageBox } from 'element-plus';
import { computed, onMounted, reactive, ref } from 'vue';
import { useRouter } from 'vue-router';
import {
deleteRagStore,
@@ -14,9 +15,11 @@ import {
type RagStoreOverview,
type RagStore,
} from '@/api/ragStores';
import RagDocumentBatchUploadDialog from '@/components/rag/RagDocumentBatchUploadDialog.vue';
type StoreStatus = '启用' | '停用';
const router = useRouter();
const loading = ref(false);
const detailLoading = ref(false);
const submitting = ref(false);
@@ -32,6 +35,7 @@ const queryForm = reactive({
const createDialogVisible = ref(false);
const editDialogVisible = ref(false);
const uploadDialogVisible = ref(false);
const createForm = reactive({
storeCode: '',
@@ -223,6 +227,31 @@ function showFutureMessage(actionName: string) {
ElMessage.info(`${actionName} 会在下一批接口里补齐`);
}
function openBatchUploadDialog() {
if (!activeStore.value?.id) {
ElMessage.warning('请选择知识库');
return;
}
uploadDialogVisible.value = true;
}
function viewActiveStoreDocuments() {
if (!activeStore.value?.id) {
return;
}
router.push({
name: 'rag-documents',
query: { storeId: String(activeStore.value.id) },
});
}
async function refreshAfterUpload() {
await Promise.all([
loadOverview(),
activeStoreId.value ? selectStore(activeStoreId.value) : Promise.resolve(),
]);
}
function getStatusTagType(status?: string | null) {
return status === '启用' ? 'success' : 'info';
}
@@ -314,7 +343,12 @@ onMounted(() => {
</div>
<div class="detail-actions">
<el-button :icon="Edit" @click="openEditDialog">编辑</el-button>
<el-button type="primary" :icon="UploadFilled" @click="showFutureMessage('批量导入文件')">
<el-button
data-test="store-batch-upload"
type="primary"
:icon="UploadFilled"
@click="openBatchUploadDialog"
>
批量导入文件
</el-button>
<el-button :icon="FolderAdd" @click="showFutureMessage('重建索引')">重建索引</el-button>
@@ -353,7 +387,14 @@ onMounted(() => {
<article class="detail-card detail-card--placeholder">
<div class="detail-card__header">
<h4>文档概览</h4>
<span>已对接后端聚合接口</span>
<el-button
data-test="view-store-documents"
link
type="primary"
@click="viewActiveStoreDocuments"
>
查看文档
</el-button>
</div>
<el-descriptions :column="2" border>
<el-descriptions-item label="文档总数">
@@ -456,6 +497,13 @@ onMounted(() => {
<el-button type="primary" :loading="submitting" @click="submitEditStore">保存</el-button>
</template>
</el-dialog>
<RagDocumentBatchUploadDialog
v-model="uploadDialogVisible"
:stores="activeStore ? [activeStore] : storeRows"
:locked-store-id="activeStoreId"
@uploaded="refreshAfterUpload"
/>
</section>
</template>

View File

@@ -0,0 +1,203 @@
import { flushPromises, mount } from '@vue/test-utils';
import ElementPlus from 'element-plus';
import { describe, expect, it, vi } from 'vitest';
import RagDocumentsPage from '../RagDocumentsPage.vue';
import { getRagDocumentById, parseRagDocuments, queryRagDocuments } from '@/api/ragDocuments';
import { queryRagStores } from '@/api/ragStores';
const routeQuery = vi.hoisted(() => ({ storeId: undefined as string | undefined }));
vi.mock('vue-router', () => ({
useRoute: () => ({
query: routeQuery,
}),
}));
vi.mock('@/api/ragStores', () => ({
queryRagStores: vi.fn(() =>
Promise.resolve({
resultcode: '0',
message: null,
data: [
{ id: '1', storeCode: 'PROD_DOC', storeName: '产品制度库', status: '启用' },
{ id: '2', storeCode: 'FAQ', storeName: 'FAQ知识库', status: '停用' },
],
}),
),
}));
vi.mock('@/api/ragDocuments', () => ({
SOURCE_TYPE_RAG: 'RAG',
queryRagDocuments: vi.fn((query?: { storeId?: string }) => {
const rows = [
{
id: '11',
storeId: '1',
attachmentId: '101',
documentTitle: '产品制度总则',
documentSummary: '制度摘要',
parseStatus: 'UPLOADED',
indexStatus: 'PENDING',
enabled: true,
remark: '制度文档',
createTime: '2026-05-21 10:00:00',
},
{
id: '22',
storeId: '2',
attachmentId: '202',
documentTitle: 'FAQ 手册',
documentSummary: 'FAQ 摘要',
parseStatus: 'PARSED',
indexStatus: 'INDEXED',
enabled: false,
remark: '常见问题',
createTime: '2026-05-21 11:00:00',
},
];
const data = query?.storeId ? rows.filter((row) => row.storeId === query.storeId) : rows;
return Promise.resolve({ resultcode: '0', message: null, data });
}),
getRagDocumentById: vi.fn((id: string) =>
Promise.resolve({
resultcode: '0',
message: null,
data: {
id,
storeId: '2',
attachmentId: '202',
documentTitle: 'FAQ 手册',
documentSummary: 'FAQ 摘要',
enabled: false,
remark: '常见问题',
},
}),
),
saveRagDocument: vi.fn(() => Promise.resolve({ resultcode: '0', message: null, data: true })),
deleteRagDocument: vi.fn(() => Promise.resolve({ resultcode: '0', message: null, data: true })),
batchUploadRagDocuments: vi.fn(() => Promise.resolve({ resultcode: '0', message: null, data: [] })),
parseRagDocuments: vi.fn(() => Promise.resolve({ resultcode: '0', message: null, data: [] })),
}));
describe('RagDocumentsPage', () => {
it('loads documents from query api', async () => {
routeQuery.storeId = undefined;
const wrapper = mount(RagDocumentsPage, {
global: {
plugins: [ElementPlus],
},
});
await flushPromises();
expect(queryRagStores).toHaveBeenCalled();
expect(queryRagDocuments).toHaveBeenCalledWith({});
expect(wrapper.text()).toContain('产品制度总则');
expect(wrapper.text()).toContain('FAQ 手册');
});
it('renders document filters as a form-style query bar', async () => {
routeQuery.storeId = undefined;
const wrapper = mount(RagDocumentsPage, {
global: {
plugins: [ElementPlus],
},
});
await flushPromises();
expect(wrapper.find('[data-test="document-query-bar"]').exists()).toBe(true);
expect(wrapper.find('[data-test="document-query-form"]').exists()).toBe(true);
expect(wrapper.find('.toolbar__filters').exists()).toBe(false);
const labels = wrapper.findAll('.document-query-form .el-form-item__label').map((label) => label.text());
expect(labels).toEqual(expect.arrayContaining(['知识库', '解析状态', '索引状态', '启用状态']));
});
it('uses route storeId as the default document query', async () => {
routeQuery.storeId = '2';
const wrapper = mount(RagDocumentsPage, {
global: {
plugins: [ElementPlus],
},
});
await flushPromises();
expect(queryRagDocuments).toHaveBeenCalledWith({ storeId: '2' });
expect(wrapper.text()).toContain('FAQ 手册');
expect(wrapper.text()).not.toContain('产品制度总则');
});
it('loads backend detail when editing a row', async () => {
routeQuery.storeId = undefined;
const wrapper = mount(RagDocumentsPage, {
global: {
plugins: [ElementPlus],
},
});
await flushPromises();
await wrapper.get('[data-test="doc-edit-22"]').trigger('click');
await flushPromises();
expect(getRagDocumentById).toHaveBeenCalledWith('22');
});
it('opens parse dialog with chunk strategy options from row action', async () => {
routeQuery.storeId = undefined;
const wrapper = mount(RagDocumentsPage, {
global: {
plugins: [ElementPlus],
},
});
await flushPromises();
await wrapper.get('[data-test="doc-parse-11"]').trigger('click');
await flushPromises();
expect(wrapper.find('[data-test="document-parse-dialog"]').exists()).toBe(true);
expect(wrapper.text()).toContain('固定长度切片');
expect(wrapper.text()).toContain('按分隔符切片');
expect(wrapper.text()).toContain('语义切片');
});
it('submits parse request with selected chunk strategy', async () => {
routeQuery.storeId = undefined;
const wrapper = mount(RagDocumentsPage, {
global: {
plugins: [ElementPlus],
},
});
await flushPromises();
await wrapper.get('[data-test="doc-parse-11"]').trigger('click');
await flushPromises();
await wrapper.get('[data-test="document-parse-submit"]').trigger('click');
await flushPromises();
expect(parseRagDocuments).toHaveBeenCalledWith({
documentIds: ['11'],
chunkStrategy: 'FIXED_LENGTH',
chunkSize: 800,
chunkOverlap: 120,
delimiter: '。',
});
});
it('renders reusable upload dialog with drag upload area', async () => {
routeQuery.storeId = '1';
const wrapper = mount(RagDocumentsPage, {
global: {
plugins: [ElementPlus],
},
});
await flushPromises();
await wrapper.get('[data-test="open-doc-upload"]').trigger('click');
await flushPromises();
expect(wrapper.text()).toContain('拖拽文件到此处');
expect(wrapper.find('[data-test="batch-upload-locked-store"]').exists()).toBe(true);
});
});

View File

@@ -11,6 +11,19 @@ import {
saveRagStore,
} from '@/api/ragStores';
const routerPush = vi.hoisted(() => vi.fn());
vi.mock('vue-router', () => ({
useRouter: () => ({
push: routerPush,
}),
}));
vi.mock('@/api/ragDocuments', () => ({
SOURCE_TYPE_RAG: 'RAG',
batchUploadRagDocuments: vi.fn(() => Promise.resolve({ resultcode: '0', message: null, data: [] })),
}));
vi.mock('@/api/ragStores', () => ({
getRagStoreOverview: vi.fn(() =>
Promise.resolve({
@@ -181,4 +194,35 @@ describe('RagStoresPage', () => {
}),
);
});
it('links document overview to the selected store documents page', async () => {
const wrapper = mount(RagStoresPage, {
global: {
plugins: [ElementPlus],
},
});
await flushPromises();
await wrapper.get('[data-test="view-store-documents"]').trigger('click');
expect(routerPush).toHaveBeenCalledWith({
name: 'rag-documents',
query: { storeId: '1' },
});
});
it('opens reusable locked upload dialog from store detail', async () => {
const wrapper = mount(RagStoresPage, {
global: {
plugins: [ElementPlus],
},
});
await flushPromises();
await wrapper.get('[data-test="store-batch-upload"]').trigger('click');
await flushPromises();
expect(wrapper.text()).toContain('拖拽文件到此处');
expect(wrapper.text()).toContain('产品制度库');
});
});

View File

@@ -1,12 +1,12 @@
import type { RouteRecordRaw } from 'vue-router';
import { createRouter, createWebHistory } from 'vue-router';
import DashboardPage from '@/pages/DashboardPage.vue';
import NotFoundPage from '@/pages/NotFoundPage.vue';
import RagDocumentsPage from '@/pages/RagDocumentsPage.vue';
import RagStoresPage from '@/pages/RagStoresPage.vue';
import SystemAttachmentsPage from '@/pages/SystemAttachmentsPage.vue';
import SystemEnumsPage from '@/pages/SystemEnumsPage.vue';
import DashboardPage from '@/pages/dashboard/DashboardPage.vue';
import NotFoundPage from '@/pages/common/NotFoundPage.vue';
import RagDocumentsPage from '@/pages/rag/RagDocumentsPage.vue';
import RagStoresPage from '@/pages/rag/RagStoresPage.vue';
import SystemAttachmentsPage from '@/pages/system/SystemAttachmentsPage.vue';
import SystemEnumsPage from '@/pages/system/SystemEnumsPage.vue';
import AdminLayout from '@/layouts/AdminLayout.vue';
export const routes: RouteRecordRaw[] = [

13
pom.xml
View File

@@ -29,6 +29,7 @@
<properties>
<java.version>21</java.version>
<mybatis-plus.version>3.5.16</mybatis-plus.version>
<tika.version>3.2.3</tika.version>
</properties>
<dependencies>
<dependency>
@@ -64,6 +65,18 @@
<artifactId>jackson-annotations</artifactId>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>${tika.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers-standard-package</artifactId>
<version>${tika.version}</version>
</dependency>
<dependency>
<groupId>org.springdoc</groupId>
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>

53
script/sql/rag_chunk.sql Normal file
View File

@@ -0,0 +1,53 @@
DROP TABLE IF EXISTS rag_chunk;
CREATE TABLE rag_chunk (
id BIGSERIAL PRIMARY KEY,
store_id BIGINT NOT NULL,
document_id BIGINT NOT NULL,
chunk_index INTEGER NOT NULL,
chunk_content TEXT NOT NULL,
chunk_summary VARCHAR(1000) DEFAULT '',
token_count INTEGER,
page_number INTEGER,
section_title VARCHAR(255) DEFAULT '',
heading_path VARCHAR(1000) DEFAULT '',
vector_id VARCHAR(128),
metadata_json JSONB NOT NULL DEFAULT '{}'::jsonb,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
version INTEGER NOT NULL DEFAULT 1,
create_time TIMESTAMP,
update_time TIMESTAMP,
remark VARCHAR(500) DEFAULT '',
create_by VARCHAR(64),
update_by VARCHAR(64),
CONSTRAINT uk_rag_chunk_document_index UNIQUE (document_id, chunk_index),
CONSTRAINT fk_rag_chunk_store_id FOREIGN KEY (store_id) REFERENCES rag_store (id),
CONSTRAINT fk_rag_chunk_document_id FOREIGN KEY (document_id) REFERENCES rag_document (id)
);
CREATE INDEX idx_rag_chunk_store_id ON rag_chunk (store_id);
CREATE INDEX idx_rag_chunk_document_id ON rag_chunk (document_id);
CREATE INDEX idx_rag_chunk_enabled ON rag_chunk (enabled);
CREATE INDEX idx_rag_chunk_vector_id ON rag_chunk (vector_id);
CREATE INDEX idx_rag_chunk_metadata_json ON rag_chunk USING GIN (metadata_json);
COMMENT ON TABLE rag_chunk IS 'RAG知识切片表';
COMMENT ON COLUMN rag_chunk.id IS 'ID';
COMMENT ON COLUMN rag_chunk.store_id IS '知识库ID';
COMMENT ON COLUMN rag_chunk.document_id IS '文档ID';
COMMENT ON COLUMN rag_chunk.chunk_index IS '文档内切片序号';
COMMENT ON COLUMN rag_chunk.chunk_content IS '切片内容';
COMMENT ON COLUMN rag_chunk.chunk_summary IS '切片摘要';
COMMENT ON COLUMN rag_chunk.token_count IS 'Token数量';
COMMENT ON COLUMN rag_chunk.page_number IS '页码';
COMMENT ON COLUMN rag_chunk.section_title IS '章节标题';
COMMENT ON COLUMN rag_chunk.heading_path IS '标题路径';
COMMENT ON COLUMN rag_chunk.vector_id IS '向量ID';
COMMENT ON COLUMN rag_chunk.metadata_json IS '切片级扩展元数据';
COMMENT ON COLUMN rag_chunk.enabled IS '是否启用';
COMMENT ON COLUMN rag_chunk.version IS '版本';
COMMENT ON COLUMN rag_chunk.create_time IS '创建时间';
COMMENT ON COLUMN rag_chunk.update_time IS '更新时间';
COMMENT ON COLUMN rag_chunk.remark IS '备注';
COMMENT ON COLUMN rag_chunk.create_by IS '创建者';
COMMENT ON COLUMN rag_chunk.update_by IS '更新者';

View File

@@ -0,0 +1,50 @@
CREATE EXTENSION IF NOT EXISTS vector;
DROP TABLE IF EXISTS rag_chunk_embedding;
CREATE TABLE rag_chunk_embedding (
id BIGSERIAL PRIMARY KEY,
store_id BIGINT NOT NULL,
document_id BIGINT NOT NULL,
chunk_id BIGINT NOT NULL,
embedding_model VARCHAR(100) NOT NULL,
embedding_dimension INTEGER NOT NULL DEFAULT 1024,
embedding VECTOR(1024) NOT NULL,
content_hash VARCHAR(64),
enabled BOOLEAN NOT NULL DEFAULT TRUE,
version INTEGER NOT NULL DEFAULT 1,
create_time TIMESTAMP,
update_time TIMESTAMP,
remark VARCHAR(500) DEFAULT '',
create_by VARCHAR(64),
update_by VARCHAR(64),
CONSTRAINT uk_rag_chunk_embedding_chunk_model UNIQUE (chunk_id, embedding_model),
CONSTRAINT fk_rag_chunk_embedding_store_id FOREIGN KEY (store_id) REFERENCES rag_store (id),
CONSTRAINT fk_rag_chunk_embedding_document_id FOREIGN KEY (document_id) REFERENCES rag_document (id),
CONSTRAINT fk_rag_chunk_embedding_chunk_id FOREIGN KEY (chunk_id) REFERENCES rag_chunk (id)
);
CREATE INDEX idx_rag_chunk_embedding_store_id ON rag_chunk_embedding (store_id);
CREATE INDEX idx_rag_chunk_embedding_document_id ON rag_chunk_embedding (document_id);
CREATE INDEX idx_rag_chunk_embedding_chunk_id ON rag_chunk_embedding (chunk_id);
CREATE INDEX idx_rag_chunk_embedding_model ON rag_chunk_embedding (embedding_model);
CREATE INDEX idx_rag_chunk_embedding_enabled ON rag_chunk_embedding (enabled);
CREATE INDEX idx_rag_chunk_embedding_vector_hnsw
ON rag_chunk_embedding USING hnsw (embedding vector_cosine_ops);
COMMENT ON TABLE rag_chunk_embedding IS 'RAG切片向量表';
COMMENT ON COLUMN rag_chunk_embedding.id IS 'ID';
COMMENT ON COLUMN rag_chunk_embedding.store_id IS '知识库ID';
COMMENT ON COLUMN rag_chunk_embedding.document_id IS '文档ID';
COMMENT ON COLUMN rag_chunk_embedding.chunk_id IS '切片ID';
COMMENT ON COLUMN rag_chunk_embedding.embedding_model IS '向量模型';
COMMENT ON COLUMN rag_chunk_embedding.embedding_dimension IS '向量维度';
COMMENT ON COLUMN rag_chunk_embedding.embedding IS '向量内容';
COMMENT ON COLUMN rag_chunk_embedding.content_hash IS '向量生成内容哈希';
COMMENT ON COLUMN rag_chunk_embedding.enabled IS '是否启用';
COMMENT ON COLUMN rag_chunk_embedding.version IS '版本';
COMMENT ON COLUMN rag_chunk_embedding.create_time IS '创建时间';
COMMENT ON COLUMN rag_chunk_embedding.update_time IS '更新时间';
COMMENT ON COLUMN rag_chunk_embedding.remark IS '备注';
COMMENT ON COLUMN rag_chunk_embedding.create_by IS '创建者';
COMMENT ON COLUMN rag_chunk_embedding.update_by IS '更新者';

View File

@@ -0,0 +1,15 @@
INSERT INTO sys_enum (catalog, type, name, value, strvalue, sort, version, remark)
VALUES
('rag', 'chunk_strategy', '固定长度切片', 1, 'FIXED_LENGTH', 1, 1, 'RAG文档切片方式'),
('rag', 'chunk_strategy', '按段落切片', 2, 'PARAGRAPH', 2, 1, 'RAG文档切片方式'),
('rag', 'chunk_strategy', '按标题层级切片', 3, 'HEADING', 3, 1, 'RAG文档切片方式'),
('rag', 'chunk_strategy', '按表格行切片', 4, 'TABLE_ROW', 4, 1, 'RAG文档切片方式'),
('rag', 'chunk_strategy', '按分隔符切片', 5, 'DELIMITER', 5, 1, 'RAG文档切片方式'),
('rag', 'chunk_strategy', '语义切片', 6, 'SEMANTIC', 6, 1, 'RAG文档切片方式')
ON CONFLICT (catalog, type, name)
DO UPDATE SET
value = EXCLUDED.value,
strvalue = EXCLUDED.strvalue,
sort = EXCLUDED.sort,
remark = EXCLUDED.remark,
update_time = CURRENT_TIMESTAMP;

View File

@@ -0,0 +1,17 @@
package com.bruce.common.config;
import com.baomidou.mybatisplus.extension.plugins.MybatisPlusInterceptor;
import com.baomidou.mybatisplus.extension.plugins.inner.OptimisticLockerInnerInterceptor;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class MybatisPlusConfig {
@Bean
public MybatisPlusInterceptor mybatisPlusInterceptor() {
MybatisPlusInterceptor interceptor = new MybatisPlusInterceptor();
interceptor.addInnerInterceptor(new OptimisticLockerInnerInterceptor());
return interceptor;
}
}

View File

@@ -0,0 +1,21 @@
package com.bruce.common.document.parse;
import lombok.Data;
import java.nio.file.Path;
@Data
public class DocumentParseContext {
private Long documentId;
private Long attachmentId;
private String originalName;
private String suffix;
private String contentType;
private Path filePath;
}

View File

@@ -0,0 +1,12 @@
package com.bruce.common.document.parse;
public class DocumentParseException extends RuntimeException {
public DocumentParseException(String message) {
super(message);
}
public DocumentParseException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@@ -0,0 +1,20 @@
package com.bruce.common.document.parse;
import lombok.Data;
import java.util.LinkedHashMap;
import java.util.Map;
@Data
public class DocumentParseResult {
private String text;
private Integer textLength;
private Integer pageCount;
private Integer sheetCount;
private Map<String, Object> metadata = new LinkedHashMap<>();
}

View File

@@ -0,0 +1,8 @@
package com.bruce.common.document.parse;
public interface DocumentParser {
boolean supports(DocumentParseContext context);
DocumentParseResult parse(DocumentParseContext context);
}

View File

@@ -0,0 +1,37 @@
package com.bruce.common.document.parse;
import org.springframework.stereotype.Component;
import org.springframework.util.StringUtils;
import java.util.List;
import java.util.Locale;
@Component
public class DocumentParserFactory {
private final List<DocumentParser> parsers;
public DocumentParserFactory(List<DocumentParser> parsers) {
this.parsers = parsers;
}
public DocumentParser resolve(DocumentParseContext context) {
return parsers.stream()
.filter(parser -> parser.supports(context))
.findFirst()
.orElseThrow(() -> new DocumentParseException("不支持的文档类型: " + resolveType(context)));
}
private String resolveType(DocumentParseContext context) {
if (context == null) {
return "unknown";
}
if (StringUtils.hasText(context.getSuffix())) {
return context.getSuffix().trim().toLowerCase(Locale.ROOT);
}
if (StringUtils.hasText(context.getContentType())) {
return context.getContentType().trim();
}
return "unknown";
}
}

View File

@@ -0,0 +1,64 @@
package com.bruce.common.document.parse.impl;
import com.bruce.common.document.parse.DocumentParseContext;
import com.bruce.common.document.parse.DocumentParseException;
import com.bruce.common.document.parse.DocumentParseResult;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.springframework.util.StringUtils;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.util.Locale;
import java.util.Set;
abstract class AbstractTikaDocumentParser {
private static final int MAX_TEXT_LENGTH = -1;
private final Tika tika = new Tika();
boolean supportsSuffix(DocumentParseContext context, Set<String> suffixes) {
return context != null
&& StringUtils.hasText(context.getSuffix())
&& suffixes.contains(context.getSuffix().trim().toLowerCase(Locale.ROOT));
}
boolean supportsContentType(DocumentParseContext context, String prefix) {
return context != null
&& StringUtils.hasText(context.getContentType())
&& context.getContentType().trim().toLowerCase(Locale.ROOT).startsWith(prefix);
}
DocumentParseResult parseWithTika(DocumentParseContext context) {
if (context == null || context.getFilePath() == null) {
throw new DocumentParseException("解析文件不能为空");
}
try {
Metadata metadata = new Metadata();
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, context.getOriginalName());
if (StringUtils.hasText(context.getContentType())) {
metadata.set(Metadata.CONTENT_TYPE, context.getContentType());
}
String text;
try (InputStream inputStream = Files.newInputStream(context.getFilePath())) {
text = tika.parseToString(inputStream, metadata, MAX_TEXT_LENGTH);
}
DocumentParseResult result = new DocumentParseResult();
result.setText(text == null ? "" : text.trim());
result.setTextLength(result.getText().length());
result.getMetadata().put("contentType", firstNonBlank(metadata.get(Metadata.CONTENT_TYPE), context.getContentType()));
result.getMetadata().put("resourceName", firstNonBlank(metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY), context.getOriginalName()));
return result;
} catch (IOException | TikaException e) {
throw new DocumentParseException("文档解析失败: " + e.getMessage(), e);
}
}
private String firstNonBlank(String first, String fallback) {
return StringUtils.hasText(first) ? first : fallback;
}
}

View File

@@ -0,0 +1,26 @@
package com.bruce.common.document.parse.impl;
import com.bruce.common.document.parse.DocumentParseContext;
import com.bruce.common.document.parse.DocumentParser;
import com.bruce.common.document.parse.DocumentParseResult;
import org.springframework.stereotype.Component;
import java.util.Set;
@Component
public class ExcelDocumentParser extends AbstractTikaDocumentParser implements DocumentParser {
private static final Set<String> SUFFIXES = Set.of("xls", "xlsx");
@Override
public boolean supports(DocumentParseContext context) {
return supportsSuffix(context, SUFFIXES)
|| supportsContentType(context, "application/vnd.ms-excel")
|| supportsContentType(context, "application/vnd.openxmlformats-officedocument.spreadsheetml");
}
@Override
public DocumentParseResult parse(DocumentParseContext context) {
return parseWithTika(context);
}
}

View File

@@ -0,0 +1,24 @@
package com.bruce.common.document.parse.impl;
import com.bruce.common.document.parse.DocumentParseContext;
import com.bruce.common.document.parse.DocumentParser;
import com.bruce.common.document.parse.DocumentParseResult;
import org.springframework.stereotype.Component;
import java.util.Set;
@Component
public class PdfDocumentParser extends AbstractTikaDocumentParser implements DocumentParser {
private static final Set<String> SUFFIXES = Set.of("pdf");
@Override
public boolean supports(DocumentParseContext context) {
return supportsSuffix(context, SUFFIXES) || supportsContentType(context, "application/pdf");
}
@Override
public DocumentParseResult parse(DocumentParseContext context) {
return parseWithTika(context);
}
}

View File

@@ -0,0 +1,24 @@
package com.bruce.common.document.parse.impl;
import com.bruce.common.document.parse.DocumentParseContext;
import com.bruce.common.document.parse.DocumentParser;
import com.bruce.common.document.parse.DocumentParseResult;
import org.springframework.stereotype.Component;
import java.util.Set;
@Component
public class TxtDocumentParser extends AbstractTikaDocumentParser implements DocumentParser {
private static final Set<String> SUFFIXES = Set.of("txt", "md", "log");
@Override
public boolean supports(DocumentParseContext context) {
return supportsSuffix(context, SUFFIXES) || supportsContentType(context, "text/");
}
@Override
public DocumentParseResult parse(DocumentParseContext context) {
return parseWithTika(context);
}
}

View File

@@ -0,0 +1,26 @@
package com.bruce.common.document.parse.impl;
import com.bruce.common.document.parse.DocumentParseContext;
import com.bruce.common.document.parse.DocumentParser;
import com.bruce.common.document.parse.DocumentParseResult;
import org.springframework.stereotype.Component;
import java.util.Set;
@Component
public class WordDocumentParser extends AbstractTikaDocumentParser implements DocumentParser {
private static final Set<String> SUFFIXES = Set.of("doc", "docx");
@Override
public boolean supports(DocumentParseContext context) {
return supportsSuffix(context, SUFFIXES)
|| supportsContentType(context, "application/msword")
|| supportsContentType(context, "application/vnd.openxmlformats-officedocument.wordprocessingml");
}
@Override
public DocumentParseResult parse(DocumentParseContext context) {
return parseWithTika(context);
}
}

View File

@@ -2,9 +2,12 @@ package com.bruce.rag.controller;
import com.bruce.common.domain.model.RequestResult;
import com.bruce.rag.dto.request.RagDocumentBatchUploadRequest;
import com.bruce.rag.dto.request.RagDocumentParseRequest;
import com.bruce.rag.dto.request.RagDocumentQueryRequest;
import com.bruce.rag.dto.request.RagDocumentSaveRequest;
import com.bruce.rag.dto.response.RagDocumentParseResponse;
import com.bruce.rag.dto.response.RagDocumentResponse;
import com.bruce.rag.service.IRagDocumentParseService;
import com.bruce.rag.service.IRagDocumentService;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@@ -29,6 +32,9 @@ public class RagDocumentController {
@Autowired
private IRagDocumentService ragDocumentService;
@Autowired
private IRagDocumentParseService ragDocumentParseService;
@Operation(summary = "查询全部知识库文档")
@PostMapping("/list")
public RequestResult<List<RagDocumentResponse>> list() {
@@ -85,4 +91,13 @@ public class RagDocumentController {
request.getStoreId(), responses.size());
return RequestResult.success(responses);
}
@Operation(summary = "解析知识库文档")
@PostMapping("/parse")
public RequestResult<List<RagDocumentParseResponse>> parse(@RequestBody RagDocumentParseRequest request) {
log.info("RagDocumentController.parse start, request={}", request);
List<RagDocumentParseResponse> responses = ragDocumentParseService.parse(request);
log.info("RagDocumentController.parse success, count={}", responses.size());
return RequestResult.success(responses);
}
}

View File

@@ -0,0 +1,26 @@
package com.bruce.rag.dto.request;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import java.util.List;
@Data
@Schema(description = "RAG知识库文档解析请求")
public class RagDocumentParseRequest {
@Schema(description = "文档ID列表")
private List<Long> documentIds;
@Schema(description = "切片方式")
private String chunkStrategy;
@Schema(description = "切片长度")
private Integer chunkSize;
@Schema(description = "重叠长度")
private Integer chunkOverlap;
@Schema(description = "分隔符")
private String delimiter;
}

View File

@@ -0,0 +1,33 @@
package com.bruce.rag.dto.response;
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import com.fasterxml.jackson.databind.ser.std.ToStringSerializer;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import java.util.LinkedHashMap;
import java.util.Map;
@Data
@Schema(description = "RAG知识库文档解析响应")
public class RagDocumentParseResponse {
@Schema(description = "文档ID")
@JsonSerialize(using = ToStringSerializer.class)
private Long documentId;
@Schema(description = "解析状态")
private String parseStatus;
@Schema(description = "文本长度")
private Integer textLength;
@Schema(description = "页数")
private Integer pageCount;
@Schema(description = "工作表数量")
private Integer sheetCount;
@Schema(description = "解析元数据")
private Map<String, Object> metadata = new LinkedHashMap<>();
}

View File

@@ -0,0 +1,67 @@
package com.bruce.rag.entity;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableName;
import com.bruce.common.domain.model.BaseEntity;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@EqualsAndHashCode(callSuper = true)
@TableName("rag_chunk")
@Schema(description = "RAG知识切片")
public class RagChunk extends BaseEntity {
@Schema(description = "知识库ID")
@TableField("store_id")
private Long storeId;
@Schema(description = "文档ID")
@TableField("document_id")
private Long documentId;
@Schema(description = "文档内切片序号")
@TableField("chunk_index")
private Integer chunkIndex;
@Schema(description = "切片内容")
@TableField("chunk_content")
private String chunkContent;
@Schema(description = "切片摘要")
@TableField("chunk_summary")
private String chunkSummary;
@Schema(description = "Token数量")
@TableField("token_count")
private Integer tokenCount;
@Schema(description = "页码")
@TableField("page_number")
private Integer pageNumber;
@Schema(description = "章节标题")
@TableField("section_title")
private String sectionTitle;
@Schema(description = "标题路径")
@TableField("heading_path")
private String headingPath;
@Schema(description = "向量ID")
@TableField("vector_id")
private String vectorId;
@Schema(description = "切片级扩展元数据JSON")
@TableField("metadata_json")
private String metadataJson;
@Schema(description = "是否启用")
private Boolean enabled;
@Schema(description = "备注")
private String remark;
}

View File

@@ -0,0 +1,50 @@
package com.bruce.rag.entity;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableName;
import com.bruce.common.domain.model.BaseEntity;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@EqualsAndHashCode(callSuper = true)
@TableName("rag_chunk_embedding")
@Schema(description = "RAG切片向量")
public class RagChunkEmbedding extends BaseEntity {
@Schema(description = "知识库ID")
@TableField("store_id")
private Long storeId;
@Schema(description = "文档ID")
@TableField("document_id")
private Long documentId;
@Schema(description = "切片ID")
@TableField("chunk_id")
private Long chunkId;
@Schema(description = "向量模型")
@TableField("embedding_model")
private String embeddingModel;
@Schema(description = "向量维度")
@TableField("embedding_dimension")
private Integer embeddingDimension;
@Schema(description = "向量内容")
private String embedding;
@Schema(description = "向量生成内容哈希")
@TableField("content_hash")
private String contentHash;
@Schema(description = "是否启用")
private Boolean enabled;
@Schema(description = "备注")
private String remark;
}

View File

@@ -0,0 +1,20 @@
package com.bruce.rag.enums;
import lombok.AllArgsConstructor;
import lombok.Getter;
@Getter
@AllArgsConstructor
public enum RagChunkStrategyEnum {
FIXED_LENGTH(1, "固定长度切片"),
PARAGRAPH(2, "按段落切片"),
HEADING(3, "按标题层级切片"),
TABLE_ROW(4, "按表格行切片"),
DELIMITER(5, "按分隔符切片"),
SEMANTIC(6, "语义切片");
private final Integer value;
private final String label;
}

View File

@@ -0,0 +1,9 @@
package com.bruce.rag.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.bruce.rag.entity.RagChunkEmbedding;
import org.apache.ibatis.annotations.Mapper;
@Mapper
public interface RagChunkEmbeddingMapper extends BaseMapper<RagChunkEmbedding> {
}

View File

@@ -0,0 +1,9 @@
package com.bruce.rag.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.bruce.rag.entity.RagChunk;
import org.apache.ibatis.annotations.Mapper;
@Mapper
public interface RagChunkMapper extends BaseMapper<RagChunk> {
}

View File

@@ -0,0 +1,7 @@
package com.bruce.rag.service;
import com.baomidou.mybatisplus.extension.service.IService;
import com.bruce.rag.entity.RagChunkEmbedding;
public interface IRagChunkEmbeddingService extends IService<RagChunkEmbedding> {
}

View File

@@ -0,0 +1,7 @@
package com.bruce.rag.service;
import com.baomidou.mybatisplus.extension.service.IService;
import com.bruce.rag.entity.RagChunk;
public interface IRagChunkService extends IService<RagChunk> {
}

View File

@@ -0,0 +1,13 @@
package com.bruce.rag.service;
import com.bruce.rag.dto.response.RagDocumentParseResponse;
import com.bruce.rag.dto.request.RagDocumentParseRequest;
import java.util.List;
public interface IRagDocumentParseService {
RagDocumentParseResponse parse(Long documentId);
List<RagDocumentParseResponse> parse(RagDocumentParseRequest request);
}

View File

@@ -0,0 +1,12 @@
package com.bruce.rag.service.impl;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.bruce.rag.entity.RagChunkEmbedding;
import com.bruce.rag.mapper.RagChunkEmbeddingMapper;
import com.bruce.rag.service.IRagChunkEmbeddingService;
import org.springframework.stereotype.Service;
@Service
public class RagChunkEmbeddingServiceImpl extends ServiceImpl<RagChunkEmbeddingMapper, RagChunkEmbedding>
implements IRagChunkEmbeddingService {
}

View File

@@ -0,0 +1,11 @@
package com.bruce.rag.service.impl;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.bruce.rag.entity.RagChunk;
import com.bruce.rag.mapper.RagChunkMapper;
import com.bruce.rag.service.IRagChunkService;
import org.springframework.stereotype.Service;
@Service
public class RagChunkServiceImpl extends ServiceImpl<RagChunkMapper, RagChunk> implements IRagChunkService {
}

View File

@@ -0,0 +1,151 @@
package com.bruce.rag.service.impl;
import com.bruce.common.config.AttachmentProperties;
import com.bruce.common.document.parse.DocumentParseContext;
import com.bruce.common.document.parse.DocumentParseException;
import com.bruce.common.document.parse.DocumentParseResult;
import com.bruce.common.document.parse.DocumentParser;
import com.bruce.common.document.parse.DocumentParserFactory;
import com.bruce.common.domain.entity.SysAttachment;
import com.bruce.common.service.ISysAttachmentService;
import com.bruce.rag.dto.request.RagDocumentParseRequest;
import com.bruce.rag.dto.response.RagDocumentParseResponse;
import com.bruce.rag.entity.RagDocument;
import com.bruce.rag.enums.RagChunkStrategyEnum;
import com.bruce.rag.enums.RagParseStatusEnum;
import com.bruce.rag.service.IRagDocumentParseService;
import com.bruce.rag.service.IRagDocumentService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.util.StringUtils;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
@Slf4j
@Service
@RequiredArgsConstructor
public class RagDocumentParseServiceImpl implements IRagDocumentParseService {
private final IRagDocumentService ragDocumentService;
private final ISysAttachmentService sysAttachmentService;
private final AttachmentProperties attachmentProperties;
private final DocumentParserFactory documentParserFactory;
@Override
public List<RagDocumentParseResponse> parse(RagDocumentParseRequest request) {
log.info("RagDocumentParseServiceImpl.parse batch start, request={}", request);
validateParseRequest(request);
List<RagDocumentParseResponse> responses = request.getDocumentIds().stream()
.map(this::parse)
.toList();
log.info("RagDocumentParseServiceImpl.parse batch success, count={}", responses.size());
return responses;
}
@Override
public RagDocumentParseResponse parse(Long documentId) {
log.info("RagDocumentParseServiceImpl.parse start, documentId={}", documentId);
if (documentId == null) {
throw new IllegalArgumentException("文档ID不能为空");
}
RagDocument document = ragDocumentService.getById(documentId);
if (document == null) {
throw new IllegalArgumentException("文档不存在ID: " + documentId);
}
if (document.getAttachmentId() == null) {
throw new IllegalArgumentException("文档附件ID不能为空");
}
SysAttachment attachment = sysAttachmentService.getById(document.getAttachmentId());
if (attachment == null) {
throw new IllegalArgumentException("附件不存在ID: " + document.getAttachmentId());
}
updateParseStatus(documentId, RagParseStatusEnum.PARSING, null);
try {
DocumentParseContext context = buildParseContext(document, attachment);
DocumentParser parser = documentParserFactory.resolve(context);
DocumentParseResult result = parser.parse(context);
updateParseStatus(documentId, RagParseStatusEnum.PARSED, null);
RagDocumentParseResponse response = toResponse(documentId, result);
log.info("RagDocumentParseServiceImpl.parse success, documentId={}, textLength={}",
documentId, response.getTextLength());
return response;
} catch (RuntimeException e) {
updateParseStatus(documentId, RagParseStatusEnum.FAILED, e.getMessage());
log.warn("RagDocumentParseServiceImpl.parse failed, documentId={}, message={}", documentId, e.getMessage());
throw e;
}
}
private void validateParseRequest(RagDocumentParseRequest request) {
if (request == null) {
throw new IllegalArgumentException("解析请求不能为空");
}
if (request.getDocumentIds() == null || request.getDocumentIds().isEmpty()) {
throw new IllegalArgumentException("文档ID列表不能为空");
}
Set<String> strategies = Arrays.stream(RagChunkStrategyEnum.values())
.map(Enum::name)
.collect(Collectors.toSet());
if (request.getChunkStrategy() == null || !strategies.contains(request.getChunkStrategy())) {
throw new IllegalArgumentException("不支持的切片方式: " + request.getChunkStrategy());
}
}
private DocumentParseContext buildParseContext(RagDocument document, SysAttachment attachment) {
Path filePath = resolveFilePath(attachment);
if (!Files.isRegularFile(filePath)) {
throw new DocumentParseException("解析文件不存在: " + filePath);
}
DocumentParseContext context = new DocumentParseContext();
context.setDocumentId(document.getId());
context.setAttachmentId(attachment.getId());
context.setOriginalName(attachment.getOriginalName());
context.setSuffix(attachment.getFileSuffix());
context.setContentType(attachment.getContentType());
context.setFilePath(filePath);
return context;
}
private Path resolveFilePath(SysAttachment attachment) {
if (!StringUtils.hasText(attachment.getFilePath())) {
throw new DocumentParseException("附件文件路径不能为空");
}
Path filePath = Path.of(attachment.getFilePath());
if (filePath.isAbsolute()) {
return filePath.normalize();
}
return Path.of(attachmentProperties.getBasePath()).resolve(filePath).normalize();
}
private void updateParseStatus(Long documentId, RagParseStatusEnum status, String errorMessage) {
RagDocument update = new RagDocument();
update.setId(documentId);
update.setParseStatus(status.name());
update.setErrorMessage(StringUtils.hasText(errorMessage) ? errorMessage : null);
ragDocumentService.updateById(update);
}
private RagDocumentParseResponse toResponse(Long documentId, DocumentParseResult result) {
RagDocumentParseResponse response = new RagDocumentParseResponse();
response.setDocumentId(documentId);
response.setParseStatus(RagParseStatusEnum.PARSED.name());
response.setTextLength(result.getTextLength());
response.setPageCount(result.getPageCount());
response.setSheetCount(result.getSheetCount());
response.setMetadata(result.getMetadata());
return response;
}
}

View File

@@ -82,10 +82,18 @@ public class RagDocumentServiceImpl extends ServiceImpl<RagDocumentMapper, RagDo
document.setIndexStatus(RagIndexStatusEnum.PENDING.name());
}
if (request.getStoreId() != null) {
document.setStoreId(request.getStoreId());
}
if (request.getAttachmentId() != null) {
document.setAttachmentId(request.getAttachmentId());
}
if (StringUtils.hasText(request.getDocumentTitle())) {
document.setDocumentTitle(request.getDocumentTitle().trim());
}
if (request.getDocumentSummary() != null) {
document.setDocumentSummary(trimToNull(request.getDocumentSummary()));
}
if (StringUtils.hasText(request.getParseStatus())) {
document.setParseStatus(request.getParseStatus().trim());
}
@@ -95,10 +103,14 @@ public class RagDocumentServiceImpl extends ServiceImpl<RagDocumentMapper, RagDo
if (request.getEnabled() != null) {
document.setEnabled(request.getEnabled());
}
if (request.getErrorMessage() != null) {
document.setErrorMessage(trimToNull(request.getErrorMessage()));
}
if (request.getRemark() != null) {
document.setRemark(trimToNull(request.getRemark()));
}
boolean result = saveOrUpdate(document);
boolean result = request.getId() == null ? save(document) : updateById(document);
log.info("RagDocumentServiceImpl.saveOrUpdate success, requestId={}, savedId={}, result={}",
request.getId(), document.getId(), result);
return result;

View File

@@ -0,0 +1,19 @@
package com.bruce.common.config;
import com.baomidou.mybatisplus.extension.plugins.inner.OptimisticLockerInnerInterceptor;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertTrue;
class MybatisPlusConfigTests {
@Test
void mybatisPlusInterceptorShouldRegisterOptimisticLocker() {
MybatisPlusConfig config = new MybatisPlusConfig();
var interceptor = config.mybatisPlusInterceptor();
assertTrue(interceptor.getInterceptors().stream()
.anyMatch(OptimisticLockerInnerInterceptor.class::isInstance));
}
}

View File

@@ -0,0 +1,50 @@
package com.bruce.common.document.parse;
import com.bruce.common.document.parse.impl.ExcelDocumentParser;
import com.bruce.common.document.parse.impl.PdfDocumentParser;
import com.bruce.common.document.parse.impl.TxtDocumentParser;
import com.bruce.common.document.parse.impl.WordDocumentParser;
import org.junit.jupiter.api.Test;
import java.nio.file.Path;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
class DocumentParserFactoryTests {
@Test
void resolveShouldChooseParserByFileSuffix() {
DocumentParserFactory factory = new DocumentParserFactory(List.of(
new TxtDocumentParser(),
new WordDocumentParser(),
new PdfDocumentParser(),
new ExcelDocumentParser()
));
assertEquals(TxtDocumentParser.class, factory.resolve(context("txt")).getClass());
assertEquals(WordDocumentParser.class, factory.resolve(context("docx")).getClass());
assertEquals(PdfDocumentParser.class, factory.resolve(context("pdf")).getClass());
assertEquals(ExcelDocumentParser.class, factory.resolve(context("xlsx")).getClass());
}
@Test
void resolveShouldRejectUnsupportedSuffix() {
DocumentParserFactory factory = new DocumentParserFactory(List.of(new TxtDocumentParser()));
DocumentParseException exception = assertThrows(
DocumentParseException.class,
() -> factory.resolve(context("zip"))
);
assertEquals("不支持的文档类型: zip", exception.getMessage());
}
private DocumentParseContext context(String suffix) {
DocumentParseContext context = new DocumentParseContext();
context.setSuffix(suffix);
context.setFilePath(Path.of("sample." + suffix));
return context;
}
}

View File

@@ -0,0 +1,47 @@
package com.bruce.common.document.parse;
import com.bruce.common.document.parse.impl.TxtDocumentParser;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
class TxtDocumentParserTests {
@TempDir
private Path tempDir;
@Test
void parseShouldReadPlainTextContent() throws Exception {
Path file = tempDir.resolve("people.txt");
Files.writeString(file, "张三 是 产品经理\n李四 是 后端工程师", StandardCharsets.UTF_8);
DocumentParseContext context = new DocumentParseContext();
context.setOriginalName("people.txt");
context.setSuffix("txt");
context.setContentType("text/plain");
context.setFilePath(file);
DocumentParseResult result = new TxtDocumentParser().parse(context);
assertEquals("张三 是 产品经理\n李四 是 后端工程师", result.getText());
assertEquals(result.getText().length(), result.getTextLength());
assertTrue(result.getMetadata().get("contentType").toString().startsWith("text/plain"));
}
@Test
void supportsShouldAcceptTextSuffixAndContentType() {
TxtDocumentParser parser = new TxtDocumentParser();
DocumentParseContext suffixContext = new DocumentParseContext();
suffixContext.setSuffix("TXT");
DocumentParseContext contentTypeContext = new DocumentParseContext();
contentTypeContext.setContentType("text/plain");
assertTrue(parser.supports(suffixContext));
assertTrue(parser.supports(contentTypeContext));
}
}

View File

@@ -3,6 +3,7 @@ package com.bruce.common.enumconfig;
import com.bruce.common.enums.CommonStatusEnum;
import com.bruce.common.enums.EnableStatusEnum;
import com.bruce.rag.enums.RagIndexStatusEnum;
import com.bruce.rag.enums.RagChunkStrategyEnum;
import com.bruce.rag.enums.RagParseStatusEnum;
import org.junit.jupiter.api.Test;
@@ -24,6 +25,9 @@ class EnumDefinitionTests {
assertEquals(4, RagParseStatusEnum.FAILED.getValue());
assertEquals(1, RagIndexStatusEnum.PENDING.getValue());
assertEquals(3, RagIndexStatusEnum.INDEXED.getValue());
assertEquals(1, RagChunkStrategyEnum.FIXED_LENGTH.getValue());
assertEquals(5, RagChunkStrategyEnum.DELIMITER.getValue());
assertEquals(6, RagChunkStrategyEnum.SEMANTIC.getValue());
}
@Test
@@ -38,5 +42,8 @@ class EnumDefinitionTests {
assertEquals("解析失败", RagParseStatusEnum.FAILED.getLabel());
assertEquals("待索引", RagIndexStatusEnum.PENDING.getLabel());
assertEquals("已索引", RagIndexStatusEnum.INDEXED.getLabel());
assertEquals("固定长度切片", RagChunkStrategyEnum.FIXED_LENGTH.getLabel());
assertEquals("按分隔符切片", RagChunkStrategyEnum.DELIMITER.getLabel());
assertEquals("语义切片", RagChunkStrategyEnum.SEMANTIC.getLabel());
}
}

View File

@@ -5,6 +5,7 @@ import com.bruce.common.domain.entity.SysEnum;
import com.bruce.common.enums.CommonStatusEnum;
import com.bruce.common.enums.EnableStatusEnum;
import com.bruce.common.service.ISysEnumService;
import com.bruce.rag.enums.RagChunkStrategyEnum;
import com.bruce.rag.enums.RagIndexStatusEnum;
import com.bruce.rag.enums.RagParseStatusEnum;
import org.junit.jupiter.api.Test;
@@ -40,6 +41,13 @@ class SysEnumDataInitTests {
saveOrUpdate("rag", "index_status", RagIndexStatusEnum.INDEXING.getLabel(), RagIndexStatusEnum.INDEXING.getValue(), 2, "RAG文档索引状态");
saveOrUpdate("rag", "index_status", RagIndexStatusEnum.INDEXED.getLabel(), RagIndexStatusEnum.INDEXED.getValue(), 3, "RAG文档索引状态");
saveOrUpdate("rag", "index_status", RagIndexStatusEnum.FAILED.getLabel(), RagIndexStatusEnum.FAILED.getValue(), 4, "RAG文档索引状态");
saveOrUpdate("rag", "chunk_strategy", RagChunkStrategyEnum.FIXED_LENGTH.getLabel(), RagChunkStrategyEnum.FIXED_LENGTH.getValue(), 1, "RAG文档切片方式");
saveOrUpdate("rag", "chunk_strategy", RagChunkStrategyEnum.PARAGRAPH.getLabel(), RagChunkStrategyEnum.PARAGRAPH.getValue(), 2, "RAG文档切片方式");
saveOrUpdate("rag", "chunk_strategy", RagChunkStrategyEnum.HEADING.getLabel(), RagChunkStrategyEnum.HEADING.getValue(), 3, "RAG文档切片方式");
saveOrUpdate("rag", "chunk_strategy", RagChunkStrategyEnum.TABLE_ROW.getLabel(), RagChunkStrategyEnum.TABLE_ROW.getValue(), 4, "RAG文档切片方式");
saveOrUpdate("rag", "chunk_strategy", RagChunkStrategyEnum.DELIMITER.getLabel(), RagChunkStrategyEnum.DELIMITER.getValue(), 5, "RAG文档切片方式");
saveOrUpdate("rag", "chunk_strategy", RagChunkStrategyEnum.SEMANTIC.getLabel(), RagChunkStrategyEnum.SEMANTIC.getValue(), 6, "RAG文档切片方式");
}
private void saveOrUpdate(String catalog, String type, String name, Integer value, Integer sort, String remark) {

View File

@@ -8,18 +8,29 @@ import com.bruce.rag.constant.RagSystemConstants;
import com.bruce.rag.controller.RagDocumentController;
import com.bruce.rag.controller.RagStoreController;
import com.bruce.rag.dto.request.RagDocumentQueryRequest;
import com.bruce.rag.dto.request.RagDocumentParseRequest;
import com.bruce.rag.dto.request.RagStoreQueryRequest;
import com.bruce.rag.dto.request.RagStoreSaveRequest;
import com.bruce.rag.dto.response.RagDocumentParseResponse;
import com.bruce.rag.dto.response.RagStoreDocumentOverviewResponse;
import com.bruce.rag.dto.response.RagStoreOverviewResponse;
import com.bruce.rag.dto.response.RagDocumentResponse;
import com.bruce.rag.dto.response.RagStoreResponse;
import com.bruce.rag.entity.RagChunk;
import com.bruce.rag.entity.RagChunkEmbedding;
import com.bruce.rag.entity.RagDocument;
import com.bruce.rag.entity.RagStore;
import com.bruce.rag.mapper.RagChunkEmbeddingMapper;
import com.bruce.rag.mapper.RagChunkMapper;
import com.bruce.rag.mapper.RagDocumentMapper;
import com.bruce.rag.mapper.RagStoreMapper;
import com.bruce.rag.service.IRagChunkEmbeddingService;
import com.bruce.rag.service.IRagChunkService;
import com.bruce.rag.service.IRagDocumentParseService;
import com.bruce.rag.service.IRagDocumentService;
import com.bruce.rag.service.IRagStoreService;
import com.bruce.rag.service.impl.RagChunkEmbeddingServiceImpl;
import com.bruce.rag.service.impl.RagChunkServiceImpl;
import com.bruce.rag.service.impl.RagDocumentServiceImpl;
import com.bruce.rag.service.impl.RagStoreServiceImpl;
import org.junit.jupiter.api.Test;
@@ -39,10 +50,16 @@ class RagComponentStructureTests {
void ragComponentsShouldReuseMybatisPlusBaseTypes() {
assertTrue(BaseMapper.class.isAssignableFrom(RagStoreMapper.class));
assertTrue(BaseMapper.class.isAssignableFrom(RagDocumentMapper.class));
assertTrue(BaseMapper.class.isAssignableFrom(RagChunkMapper.class));
assertTrue(BaseMapper.class.isAssignableFrom(RagChunkEmbeddingMapper.class));
assertTrue(IService.class.isAssignableFrom(IRagStoreService.class));
assertTrue(IService.class.isAssignableFrom(IRagDocumentService.class));
assertTrue(IService.class.isAssignableFrom(IRagChunkService.class));
assertTrue(IService.class.isAssignableFrom(IRagChunkEmbeddingService.class));
assertTrue(ServiceImpl.class.isAssignableFrom(RagStoreServiceImpl.class));
assertTrue(ServiceImpl.class.isAssignableFrom(RagDocumentServiceImpl.class));
assertTrue(ServiceImpl.class.isAssignableFrom(RagChunkServiceImpl.class));
assertTrue(ServiceImpl.class.isAssignableFrom(RagChunkEmbeddingServiceImpl.class));
}
@Test
@@ -63,8 +80,10 @@ class RagComponentStructureTests {
Method documentListMethod = RagDocumentController.class.getMethod("list");
Method documentQueryMethod = RagDocumentController.class.getMethod("query", RagDocumentQueryRequest.class);
Method documentParseMethod = RagDocumentController.class.getMethod("parse", RagDocumentParseRequest.class);
Method documentResponseListMethod = IRagDocumentService.class.getMethod("listResponses");
Method documentServiceQueryMethod = IRagDocumentService.class.getMethod("query", RagDocumentQueryRequest.class);
Method documentParseServiceMethod = IRagDocumentParseService.class.getMethod("parse", RagDocumentParseRequest.class);
assertEquals(RequestResult.class, storeListMethod.getReturnType());
assertEquals(RequestResult.class, storeQueryMethod.getReturnType());
@@ -89,11 +108,14 @@ class RagComponentStructureTests {
assertEquals(RequestResult.class, documentListMethod.getReturnType());
assertEquals(RequestResult.class, documentQueryMethod.getReturnType());
assertEquals(RequestResult.class, documentParseMethod.getReturnType());
assertEquals(List.class, documentServiceQueryMethod.getReturnType());
assertEquals(List.class, documentParseServiceMethod.getReturnType());
assertTrue(documentResponseListMethod.getGenericReturnType().getTypeName().contains("RagDocumentResponse"));
assertTrue(documentServiceQueryMethod.getGenericReturnType().getTypeName().contains("RagDocumentResponse"));
assertTrue(documentListMethod.getGenericReturnType().getTypeName().contains("RagDocumentResponse"));
assertTrue(documentQueryMethod.getGenericReturnType().getTypeName().contains("RagDocumentResponse"));
assertTrue(documentParseMethod.getGenericReturnType().getTypeName().contains("RagDocumentParseResponse"));
assertEquals(RagDocumentResponse.class, RagDocumentResponse.class.getMethod("fromEntity", RagDocument.class).getReturnType());
}
@@ -121,4 +143,34 @@ class RagComponentStructureTests {
assertTrue(RagStoreController.class.getSimpleName().contains("RagStoreController"));
assertTrue(RagDocumentController.class.getSimpleName().contains("RagDocumentController"));
}
@Test
void ragChunkStructureShouldSupportChunkMetadata() throws NoSuchFieldException {
assertEquals(Long.class, RagChunk.class.getDeclaredField("storeId").getType());
assertEquals(Long.class, RagChunk.class.getDeclaredField("documentId").getType());
assertEquals(Integer.class, RagChunk.class.getDeclaredField("chunkIndex").getType());
assertEquals(String.class, RagChunk.class.getDeclaredField("chunkContent").getType());
assertEquals(String.class, RagChunk.class.getDeclaredField("chunkSummary").getType());
assertEquals(Integer.class, RagChunk.class.getDeclaredField("tokenCount").getType());
assertEquals(Integer.class, RagChunk.class.getDeclaredField("pageNumber").getType());
assertEquals(String.class, RagChunk.class.getDeclaredField("sectionTitle").getType());
assertEquals(String.class, RagChunk.class.getDeclaredField("headingPath").getType());
assertEquals(String.class, RagChunk.class.getDeclaredField("vectorId").getType());
assertEquals(String.class, RagChunk.class.getDeclaredField("metadataJson").getType());
assertEquals(Boolean.class, RagChunk.class.getDeclaredField("enabled").getType());
assertEquals(String.class, RagChunk.class.getDeclaredField("remark").getType());
}
@Test
void ragChunkEmbeddingStructureShouldSupportPgvectorMetadata() throws NoSuchFieldException {
assertEquals(Long.class, RagChunkEmbedding.class.getDeclaredField("storeId").getType());
assertEquals(Long.class, RagChunkEmbedding.class.getDeclaredField("documentId").getType());
assertEquals(Long.class, RagChunkEmbedding.class.getDeclaredField("chunkId").getType());
assertEquals(String.class, RagChunkEmbedding.class.getDeclaredField("embeddingModel").getType());
assertEquals(Integer.class, RagChunkEmbedding.class.getDeclaredField("embeddingDimension").getType());
assertEquals(String.class, RagChunkEmbedding.class.getDeclaredField("embedding").getType());
assertEquals(String.class, RagChunkEmbedding.class.getDeclaredField("contentHash").getType());
assertEquals(Boolean.class, RagChunkEmbedding.class.getDeclaredField("enabled").getType());
assertEquals(String.class, RagChunkEmbedding.class.getDeclaredField("remark").getType());
}
}

View File

@@ -0,0 +1,160 @@
package com.bruce.rag;
import com.bruce.common.config.AttachmentProperties;
import com.bruce.common.document.parse.DocumentParseContext;
import com.bruce.common.document.parse.DocumentParseResult;
import com.bruce.common.document.parse.DocumentParser;
import com.bruce.common.document.parse.DocumentParserFactory;
import com.bruce.common.domain.entity.SysAttachment;
import com.bruce.common.service.ISysAttachmentService;
import com.bruce.rag.dto.request.RagDocumentParseRequest;
import com.bruce.rag.dto.response.RagDocumentParseResponse;
import com.bruce.rag.entity.RagDocument;
import com.bruce.rag.enums.RagParseStatusEnum;
import com.bruce.rag.service.IRagDocumentService;
import com.bruce.rag.service.impl.RagDocumentParseServiceImpl;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.ArgumentCaptor;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class RagDocumentParseServiceImplTests {
@TempDir
private Path tempDir;
@Mock
private IRagDocumentService ragDocumentService;
@Mock
private ISysAttachmentService sysAttachmentService;
@Test
void parseShouldUpdateStatusAndReturnParseResponse() throws Exception {
Path file = tempDir.resolve("rag").resolve("people.txt");
Files.createDirectories(file.getParent());
Files.writeString(file, "people profiles");
RagDocument document = new RagDocument();
document.setId(1001L);
document.setStoreId(2002L);
document.setAttachmentId(3003L);
document.setParseStatus(RagParseStatusEnum.UPLOADED.name());
SysAttachment attachment = new SysAttachment();
attachment.setId(3003L);
attachment.setOriginalName("people.txt");
attachment.setFileSuffix("txt");
attachment.setContentType("text/plain");
attachment.setFilePath("rag/people.txt");
AttachmentProperties attachmentProperties = new AttachmentProperties();
attachmentProperties.setBasePath(tempDir.toString());
DocumentParser parser = new FixedDocumentParser("people profiles");
RagDocumentParseServiceImpl service = new RagDocumentParseServiceImpl(
ragDocumentService,
sysAttachmentService,
attachmentProperties,
new DocumentParserFactory(List.of(parser))
);
when(ragDocumentService.getById(1001L)).thenReturn(document);
when(sysAttachmentService.getById(3003L)).thenReturn(attachment);
when(ragDocumentService.updateById(any(RagDocument.class))).thenReturn(true);
RagDocumentParseResponse response = service.parse(1001L);
assertEquals(1001L, response.getDocumentId());
assertEquals(RagParseStatusEnum.PARSED.name(), response.getParseStatus());
assertEquals(15, response.getTextLength());
assertEquals("fixed", response.getMetadata().get("parser"));
ArgumentCaptor<RagDocument> captor = ArgumentCaptor.forClass(RagDocument.class);
verify(ragDocumentService, times(2)).updateById(captor.capture());
List<RagDocument> updates = captor.getAllValues();
assertEquals(RagParseStatusEnum.PARSING.name(), updates.get(0).getParseStatus());
assertEquals(RagParseStatusEnum.PARSED.name(), updates.get(1).getParseStatus());
assertTrue(parser.supports(new DocumentParseContext()));
}
@Test
void parseShouldSupportBatchRequestAndChunkStrategyStructure() throws Exception {
Path file = tempDir.resolve("rag").resolve("batch.txt");
Files.createDirectories(file.getParent());
Files.writeString(file, "batch profiles");
RagDocument document = new RagDocument();
document.setId(1002L);
document.setStoreId(2002L);
document.setAttachmentId(3004L);
document.setParseStatus(RagParseStatusEnum.UPLOADED.name());
SysAttachment attachment = new SysAttachment();
attachment.setId(3004L);
attachment.setOriginalName("batch.txt");
attachment.setFileSuffix("txt");
attachment.setContentType("text/plain");
attachment.setFilePath("rag/batch.txt");
AttachmentProperties attachmentProperties = new AttachmentProperties();
attachmentProperties.setBasePath(tempDir.toString());
RagDocumentParseServiceImpl service = new RagDocumentParseServiceImpl(
ragDocumentService,
sysAttachmentService,
attachmentProperties,
new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles")))
);
RagDocumentParseRequest request = new RagDocumentParseRequest();
request.setDocumentIds(List.of(1002L));
request.setChunkStrategy("DELIMITER");
request.setDelimiter("");
when(ragDocumentService.getById(1002L)).thenReturn(document);
when(sysAttachmentService.getById(3004L)).thenReturn(attachment);
when(ragDocumentService.updateById(any(RagDocument.class))).thenReturn(true);
List<RagDocumentParseResponse> responses = service.parse(request);
assertEquals(1, responses.size());
assertEquals(1002L, responses.getFirst().getDocumentId());
assertEquals(RagParseStatusEnum.PARSED.name(), responses.getFirst().getParseStatus());
}
private static class FixedDocumentParser implements DocumentParser {
private final String text;
private FixedDocumentParser(String text) {
this.text = text;
}
@Override
public boolean supports(DocumentParseContext context) {
return true;
}
@Override
public DocumentParseResult parse(DocumentParseContext context) {
DocumentParseResult result = new DocumentParseResult();
result.setText(text);
result.setTextLength(text.length());
result.setMetadata(Map.of("parser", "fixed"));
return result;
}
}
}

View File

@@ -102,13 +102,13 @@ class RagDocumentServiceImplTests {
request.setRemark(" 备注信息 ");
doReturn(existingDocument).when(ragDocumentService).getById(3003L);
doReturn(true).when(ragDocumentService).saveOrUpdate(any(RagDocument.class));
doReturn(true).when(ragDocumentService).updateById(any(RagDocument.class));
boolean result = ragDocumentService.saveOrUpdate(request);
assertTrue(result);
ArgumentCaptor<RagDocument> documentCaptor = ArgumentCaptor.forClass(RagDocument.class);
verify(ragDocumentService).saveOrUpdate(documentCaptor.capture());
verify(ragDocumentService).updateById(documentCaptor.capture());
RagDocument savedDocument = documentCaptor.getValue();
assertEquals(3003L, savedDocument.getId());
assertEquals(1001L, savedDocument.getStoreId());
@@ -121,4 +121,40 @@ class RagDocumentServiceImplTests {
assertEquals("已修复", savedDocument.getErrorMessage());
assertEquals("备注信息", savedDocument.getRemark());
}
@Test
void saveOrUpdateShouldPreserveExistingFieldsForPartialUpdate() {
RagDocument existingDocument = new RagDocument();
existingDocument.setId(3003L);
existingDocument.setStoreId(1001L);
existingDocument.setAttachmentId(2002L);
existingDocument.setDocumentTitle("people_profiles.txt");
existingDocument.setDocumentSummary("测试人员信息,有多条人员信息");
existingDocument.setParseStatus(RagParseStatusEnum.UPLOADED.name());
existingDocument.setIndexStatus(RagIndexStatusEnum.PENDING.name());
existingDocument.setEnabled(true);
existingDocument.setRemark("测试人员信息");
RagDocumentSaveRequest request = new RagDocumentSaveRequest();
request.setId(3003L);
request.setStoreId(1001L);
request.setDocumentTitle("people_profiles.txt");
request.setEnabled(false);
doReturn(existingDocument).when(ragDocumentService).getById(3003L);
doReturn(true).when(ragDocumentService).updateById(any(RagDocument.class));
boolean result = ragDocumentService.saveOrUpdate(request);
assertTrue(result);
ArgumentCaptor<RagDocument> documentCaptor = ArgumentCaptor.forClass(RagDocument.class);
verify(ragDocumentService).updateById(documentCaptor.capture());
RagDocument savedDocument = documentCaptor.getValue();
assertEquals(2002L, savedDocument.getAttachmentId());
assertEquals("测试人员信息,有多条人员信息", savedDocument.getDocumentSummary());
assertEquals(RagParseStatusEnum.UPLOADED.name(), savedDocument.getParseStatus());
assertEquals(RagIndexStatusEnum.PENDING.name(), savedDocument.getIndexStatus());
assertEquals(false, savedDocument.getEnabled());
assertEquals("测试人员信息", savedDocument.getRemark());
}
}