feat(rag): 落解析快照并打通自动解析链路

This commit is contained in:
2026-05-24 23:17:12 +08:00
parent e51903efbe
commit d8079d6277
16 changed files with 427 additions and 28 deletions

View File

@@ -3,6 +3,8 @@ package com.bruce.rag;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.baomidou.mybatisplus.extension.service.IService;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableName;
import com.bruce.common.domain.model.RequestResult;
import com.bruce.rag.constant.RagSystemConstants;
import com.bruce.rag.controller.RagDocumentController;
@@ -19,20 +21,25 @@ import com.bruce.rag.dto.response.RagStoreResponse;
import com.bruce.rag.entity.RagChunk;
import com.bruce.rag.entity.RagChunkEmbedding;
import com.bruce.rag.entity.RagDocument;
import com.bruce.rag.entity.RagDocumentParseResult;
import com.bruce.rag.entity.RagStore;
import com.bruce.rag.mapper.RagChunkEmbeddingMapper;
import com.bruce.rag.mapper.RagChunkMapper;
import com.bruce.rag.mapper.RagDocumentMapper;
import com.bruce.rag.mapper.RagDocumentParseResultMapper;
import com.bruce.rag.mapper.RagStoreMapper;
import com.bruce.rag.service.IRagChunkEmbeddingService;
import com.bruce.rag.service.IRagChunkService;
import com.bruce.rag.service.IRagDocumentParseService;
import com.bruce.rag.service.IRagDocumentParseResultService;
import com.bruce.rag.service.IRagDocumentService;
import com.bruce.rag.service.IRagStoreService;
import com.bruce.rag.service.impl.RagChunkEmbeddingServiceImpl;
import com.bruce.rag.service.impl.RagChunkServiceImpl;
import com.bruce.rag.service.impl.RagDocumentParseResultServiceImpl;
import com.bruce.rag.service.impl.RagDocumentServiceImpl;
import com.bruce.rag.service.impl.RagStoreServiceImpl;
import com.bruce.rag.typehandler.PgJsonbStringTypeHandler;
import org.junit.jupiter.api.Test;
import org.springframework.web.bind.annotation.PostMapping;
@@ -50,14 +57,17 @@ class RagComponentStructureTests {
void ragComponentsShouldReuseMybatisPlusBaseTypes() {
assertTrue(BaseMapper.class.isAssignableFrom(RagStoreMapper.class));
assertTrue(BaseMapper.class.isAssignableFrom(RagDocumentMapper.class));
assertTrue(BaseMapper.class.isAssignableFrom(RagDocumentParseResultMapper.class));
assertTrue(BaseMapper.class.isAssignableFrom(RagChunkMapper.class));
assertTrue(BaseMapper.class.isAssignableFrom(RagChunkEmbeddingMapper.class));
assertTrue(IService.class.isAssignableFrom(IRagStoreService.class));
assertTrue(IService.class.isAssignableFrom(IRagDocumentService.class));
assertTrue(IService.class.isAssignableFrom(IRagDocumentParseResultService.class));
assertTrue(IService.class.isAssignableFrom(IRagChunkService.class));
assertTrue(IService.class.isAssignableFrom(IRagChunkEmbeddingService.class));
assertTrue(ServiceImpl.class.isAssignableFrom(RagStoreServiceImpl.class));
assertTrue(ServiceImpl.class.isAssignableFrom(RagDocumentServiceImpl.class));
assertTrue(ServiceImpl.class.isAssignableFrom(RagDocumentParseResultServiceImpl.class));
assertTrue(ServiceImpl.class.isAssignableFrom(RagChunkServiceImpl.class));
assertTrue(ServiceImpl.class.isAssignableFrom(RagChunkEmbeddingServiceImpl.class));
}
@@ -173,4 +183,35 @@ class RagComponentStructureTests {
assertEquals(Boolean.class, RagChunkEmbedding.class.getDeclaredField("enabled").getType());
assertEquals(String.class, RagChunkEmbedding.class.getDeclaredField("remark").getType());
}
@Test
void ragParseResultStructureShouldSupportSnapshotMetadata() throws NoSuchFieldException {
assertEquals(Long.class, RagDocumentParseResult.class.getDeclaredField("storeId").getType());
assertEquals(Long.class, RagDocumentParseResult.class.getDeclaredField("documentId").getType());
assertEquals(String.class, RagDocumentParseResult.class.getDeclaredField("parsedText").getType());
assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("textLength").getType());
assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("pageCount").getType());
assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("sheetCount").getType());
assertEquals(String.class, RagDocumentParseResult.class.getDeclaredField("metadataJson").getType());
assertEquals(String.class, RagDocumentParseResult.class.getDeclaredField("contentHash").getType());
assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("parseVersion").getType());
assertEquals(Boolean.class, RagDocumentParseResult.class.getDeclaredField("enabled").getType());
}
@Test
void ragMetadataJsonFieldsShouldUseJsonbTypeHandler() throws NoSuchFieldException {
TableName chunkTable = RagChunk.class.getAnnotation(TableName.class);
TableName parseResultTable = RagDocumentParseResult.class.getAnnotation(TableName.class);
TableField chunkMetadataField = RagChunk.class.getDeclaredField("metadataJson").getAnnotation(TableField.class);
TableField parseResultMetadataField = RagDocumentParseResult.class.getDeclaredField("metadataJson").getAnnotation(TableField.class);
assertNotNull(chunkTable);
assertNotNull(parseResultTable);
assertTrue(chunkTable.autoResultMap());
assertTrue(parseResultTable.autoResultMap());
assertNotNull(chunkMetadataField);
assertNotNull(parseResultMetadataField);
assertEquals(PgJsonbStringTypeHandler.class, chunkMetadataField.typeHandler());
assertEquals(PgJsonbStringTypeHandler.class, parseResultMetadataField.typeHandler());
}
}

View File

@@ -10,8 +10,9 @@ import com.bruce.common.service.ISysAttachmentService;
import com.bruce.rag.dto.request.RagDocumentParseRequest;
import com.bruce.rag.dto.response.RagDocumentParseResponse;
import com.bruce.rag.entity.RagDocument;
import com.bruce.rag.mapper.RagDocumentMapper;
import com.bruce.rag.service.IRagDocumentParseResultService;
import com.bruce.rag.enums.RagParseStatusEnum;
import com.bruce.rag.service.IRagDocumentService;
import com.bruce.rag.service.impl.RagDocumentParseServiceImpl;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
@@ -32,6 +33,7 @@ import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.when;
import static org.mockito.Mockito.mock;
@ExtendWith(MockitoExtension.class)
class RagDocumentParseServiceImplTests {
@@ -40,7 +42,7 @@ class RagDocumentParseServiceImplTests {
private Path tempDir;
@Mock
private IRagDocumentService ragDocumentService;
private RagDocumentMapper ragDocumentMapper;
@Mock
private ISysAttachmentService sysAttachmentService;
@@ -68,15 +70,16 @@ class RagDocumentParseServiceImplTests {
attachmentProperties.setBasePath(tempDir.toString());
DocumentParser parser = new FixedDocumentParser("people profiles");
RagDocumentParseServiceImpl service = new RagDocumentParseServiceImpl(
ragDocumentService,
ragDocumentMapper,
sysAttachmentService,
attachmentProperties,
new DocumentParserFactory(List.of(parser))
new DocumentParserFactory(List.of(parser)),
mock(IRagDocumentParseResultService.class)
);
when(ragDocumentService.getById(1001L)).thenReturn(document);
when(ragDocumentMapper.selectById(1001L)).thenReturn(document);
when(sysAttachmentService.getById(3003L)).thenReturn(attachment);
when(ragDocumentService.updateById(any(RagDocument.class))).thenReturn(true);
when(ragDocumentMapper.updateById(any(RagDocument.class))).thenReturn(1);
RagDocumentParseResponse response = service.parse(1001L);
@@ -86,7 +89,7 @@ class RagDocumentParseServiceImplTests {
assertEquals("fixed", response.getMetadata().get("parser"));
ArgumentCaptor<RagDocument> captor = ArgumentCaptor.forClass(RagDocument.class);
verify(ragDocumentService, times(2)).updateById(captor.capture());
verify(ragDocumentMapper, times(2)).updateById(captor.capture());
List<RagDocument> updates = captor.getAllValues();
assertEquals(RagParseStatusEnum.PARSING.name(), updates.get(0).getParseStatus());
assertEquals(RagParseStatusEnum.PARSED.name(), updates.get(1).getParseStatus());
@@ -115,17 +118,18 @@ class RagDocumentParseServiceImplTests {
AttachmentProperties attachmentProperties = new AttachmentProperties();
attachmentProperties.setBasePath(tempDir.toString());
RagDocumentParseServiceImpl service = new RagDocumentParseServiceImpl(
ragDocumentService,
ragDocumentMapper,
sysAttachmentService,
attachmentProperties,
new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles")))
new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles"))),
mock(IRagDocumentParseResultService.class)
);
RagDocumentParseRequest request = new RagDocumentParseRequest();
request.setDocumentIds(List.of(1002L));
when(ragDocumentService.getById(1002L)).thenReturn(document);
when(ragDocumentMapper.selectById(1002L)).thenReturn(document);
when(sysAttachmentService.getById(3004L)).thenReturn(attachment);
when(ragDocumentService.updateById(any(RagDocument.class))).thenReturn(true);
when(ragDocumentMapper.updateById(any(RagDocument.class))).thenReturn(1);
List<RagDocumentParseResponse> responses = service.parse(request);
@@ -139,10 +143,11 @@ class RagDocumentParseServiceImplTests {
AttachmentProperties attachmentProperties = new AttachmentProperties();
attachmentProperties.setBasePath(tempDir.toString());
RagDocumentParseServiceImpl service = new RagDocumentParseServiceImpl(
ragDocumentService,
ragDocumentMapper,
sysAttachmentService,
attachmentProperties,
new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles")))
new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles"))),
mock(IRagDocumentParseResultService.class)
);
RagDocumentParseRequest request = new RagDocumentParseRequest();
request.setDocumentIds(List.of());

View File

@@ -9,6 +9,7 @@ import com.bruce.rag.dto.request.RagDocumentSaveRequest;
import com.bruce.rag.entity.RagDocument;
import com.bruce.rag.enums.RagIndexStatusEnum;
import com.bruce.rag.enums.RagParseStatusEnum;
import com.bruce.rag.service.IRagDocumentAutoParseService;
import com.bruce.rag.service.impl.RagDocumentServiceImpl;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
@@ -18,7 +19,6 @@ import org.mockito.Mock;
import org.mockito.Spy;
import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.context.ApplicationEventPublisher;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
@@ -40,7 +40,7 @@ class RagDocumentServiceImplTests {
private ISysAttachmentService sysAttachmentService;
@Mock
private ApplicationEventPublisher eventPublisher;
private IRagDocumentAutoParseService ragDocumentAutoParseService;
@Test
void batchUploadShouldUseRagSourceTypeAndStoreIdAsSourceId() {