feat(rag): 落解析快照并打通自动解析链路
This commit is contained in:
@@ -3,6 +3,8 @@ package com.bruce.rag;
|
||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||
import com.baomidou.mybatisplus.extension.service.IService;
|
||||
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
|
||||
import com.baomidou.mybatisplus.annotation.TableField;
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import com.bruce.common.domain.model.RequestResult;
|
||||
import com.bruce.rag.constant.RagSystemConstants;
|
||||
import com.bruce.rag.controller.RagDocumentController;
|
||||
@@ -19,20 +21,25 @@ import com.bruce.rag.dto.response.RagStoreResponse;
|
||||
import com.bruce.rag.entity.RagChunk;
|
||||
import com.bruce.rag.entity.RagChunkEmbedding;
|
||||
import com.bruce.rag.entity.RagDocument;
|
||||
import com.bruce.rag.entity.RagDocumentParseResult;
|
||||
import com.bruce.rag.entity.RagStore;
|
||||
import com.bruce.rag.mapper.RagChunkEmbeddingMapper;
|
||||
import com.bruce.rag.mapper.RagChunkMapper;
|
||||
import com.bruce.rag.mapper.RagDocumentMapper;
|
||||
import com.bruce.rag.mapper.RagDocumentParseResultMapper;
|
||||
import com.bruce.rag.mapper.RagStoreMapper;
|
||||
import com.bruce.rag.service.IRagChunkEmbeddingService;
|
||||
import com.bruce.rag.service.IRagChunkService;
|
||||
import com.bruce.rag.service.IRagDocumentParseService;
|
||||
import com.bruce.rag.service.IRagDocumentParseResultService;
|
||||
import com.bruce.rag.service.IRagDocumentService;
|
||||
import com.bruce.rag.service.IRagStoreService;
|
||||
import com.bruce.rag.service.impl.RagChunkEmbeddingServiceImpl;
|
||||
import com.bruce.rag.service.impl.RagChunkServiceImpl;
|
||||
import com.bruce.rag.service.impl.RagDocumentParseResultServiceImpl;
|
||||
import com.bruce.rag.service.impl.RagDocumentServiceImpl;
|
||||
import com.bruce.rag.service.impl.RagStoreServiceImpl;
|
||||
import com.bruce.rag.typehandler.PgJsonbStringTypeHandler;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
|
||||
@@ -50,14 +57,17 @@ class RagComponentStructureTests {
|
||||
void ragComponentsShouldReuseMybatisPlusBaseTypes() {
|
||||
assertTrue(BaseMapper.class.isAssignableFrom(RagStoreMapper.class));
|
||||
assertTrue(BaseMapper.class.isAssignableFrom(RagDocumentMapper.class));
|
||||
assertTrue(BaseMapper.class.isAssignableFrom(RagDocumentParseResultMapper.class));
|
||||
assertTrue(BaseMapper.class.isAssignableFrom(RagChunkMapper.class));
|
||||
assertTrue(BaseMapper.class.isAssignableFrom(RagChunkEmbeddingMapper.class));
|
||||
assertTrue(IService.class.isAssignableFrom(IRagStoreService.class));
|
||||
assertTrue(IService.class.isAssignableFrom(IRagDocumentService.class));
|
||||
assertTrue(IService.class.isAssignableFrom(IRagDocumentParseResultService.class));
|
||||
assertTrue(IService.class.isAssignableFrom(IRagChunkService.class));
|
||||
assertTrue(IService.class.isAssignableFrom(IRagChunkEmbeddingService.class));
|
||||
assertTrue(ServiceImpl.class.isAssignableFrom(RagStoreServiceImpl.class));
|
||||
assertTrue(ServiceImpl.class.isAssignableFrom(RagDocumentServiceImpl.class));
|
||||
assertTrue(ServiceImpl.class.isAssignableFrom(RagDocumentParseResultServiceImpl.class));
|
||||
assertTrue(ServiceImpl.class.isAssignableFrom(RagChunkServiceImpl.class));
|
||||
assertTrue(ServiceImpl.class.isAssignableFrom(RagChunkEmbeddingServiceImpl.class));
|
||||
}
|
||||
@@ -173,4 +183,35 @@ class RagComponentStructureTests {
|
||||
assertEquals(Boolean.class, RagChunkEmbedding.class.getDeclaredField("enabled").getType());
|
||||
assertEquals(String.class, RagChunkEmbedding.class.getDeclaredField("remark").getType());
|
||||
}
|
||||
|
||||
@Test
|
||||
void ragParseResultStructureShouldSupportSnapshotMetadata() throws NoSuchFieldException {
|
||||
assertEquals(Long.class, RagDocumentParseResult.class.getDeclaredField("storeId").getType());
|
||||
assertEquals(Long.class, RagDocumentParseResult.class.getDeclaredField("documentId").getType());
|
||||
assertEquals(String.class, RagDocumentParseResult.class.getDeclaredField("parsedText").getType());
|
||||
assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("textLength").getType());
|
||||
assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("pageCount").getType());
|
||||
assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("sheetCount").getType());
|
||||
assertEquals(String.class, RagDocumentParseResult.class.getDeclaredField("metadataJson").getType());
|
||||
assertEquals(String.class, RagDocumentParseResult.class.getDeclaredField("contentHash").getType());
|
||||
assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("parseVersion").getType());
|
||||
assertEquals(Boolean.class, RagDocumentParseResult.class.getDeclaredField("enabled").getType());
|
||||
}
|
||||
|
||||
@Test
|
||||
void ragMetadataJsonFieldsShouldUseJsonbTypeHandler() throws NoSuchFieldException {
|
||||
TableName chunkTable = RagChunk.class.getAnnotation(TableName.class);
|
||||
TableName parseResultTable = RagDocumentParseResult.class.getAnnotation(TableName.class);
|
||||
TableField chunkMetadataField = RagChunk.class.getDeclaredField("metadataJson").getAnnotation(TableField.class);
|
||||
TableField parseResultMetadataField = RagDocumentParseResult.class.getDeclaredField("metadataJson").getAnnotation(TableField.class);
|
||||
|
||||
assertNotNull(chunkTable);
|
||||
assertNotNull(parseResultTable);
|
||||
assertTrue(chunkTable.autoResultMap());
|
||||
assertTrue(parseResultTable.autoResultMap());
|
||||
assertNotNull(chunkMetadataField);
|
||||
assertNotNull(parseResultMetadataField);
|
||||
assertEquals(PgJsonbStringTypeHandler.class, chunkMetadataField.typeHandler());
|
||||
assertEquals(PgJsonbStringTypeHandler.class, parseResultMetadataField.typeHandler());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,8 +10,9 @@ import com.bruce.common.service.ISysAttachmentService;
|
||||
import com.bruce.rag.dto.request.RagDocumentParseRequest;
|
||||
import com.bruce.rag.dto.response.RagDocumentParseResponse;
|
||||
import com.bruce.rag.entity.RagDocument;
|
||||
import com.bruce.rag.mapper.RagDocumentMapper;
|
||||
import com.bruce.rag.service.IRagDocumentParseResultService;
|
||||
import com.bruce.rag.enums.RagParseStatusEnum;
|
||||
import com.bruce.rag.service.IRagDocumentService;
|
||||
import com.bruce.rag.service.impl.RagDocumentParseServiceImpl;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
@@ -32,6 +33,7 @@ import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.times;
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.mockito.Mockito.mock;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class RagDocumentParseServiceImplTests {
|
||||
@@ -40,7 +42,7 @@ class RagDocumentParseServiceImplTests {
|
||||
private Path tempDir;
|
||||
|
||||
@Mock
|
||||
private IRagDocumentService ragDocumentService;
|
||||
private RagDocumentMapper ragDocumentMapper;
|
||||
|
||||
@Mock
|
||||
private ISysAttachmentService sysAttachmentService;
|
||||
@@ -68,15 +70,16 @@ class RagDocumentParseServiceImplTests {
|
||||
attachmentProperties.setBasePath(tempDir.toString());
|
||||
DocumentParser parser = new FixedDocumentParser("people profiles");
|
||||
RagDocumentParseServiceImpl service = new RagDocumentParseServiceImpl(
|
||||
ragDocumentService,
|
||||
ragDocumentMapper,
|
||||
sysAttachmentService,
|
||||
attachmentProperties,
|
||||
new DocumentParserFactory(List.of(parser))
|
||||
new DocumentParserFactory(List.of(parser)),
|
||||
mock(IRagDocumentParseResultService.class)
|
||||
);
|
||||
|
||||
when(ragDocumentService.getById(1001L)).thenReturn(document);
|
||||
when(ragDocumentMapper.selectById(1001L)).thenReturn(document);
|
||||
when(sysAttachmentService.getById(3003L)).thenReturn(attachment);
|
||||
when(ragDocumentService.updateById(any(RagDocument.class))).thenReturn(true);
|
||||
when(ragDocumentMapper.updateById(any(RagDocument.class))).thenReturn(1);
|
||||
|
||||
RagDocumentParseResponse response = service.parse(1001L);
|
||||
|
||||
@@ -86,7 +89,7 @@ class RagDocumentParseServiceImplTests {
|
||||
assertEquals("fixed", response.getMetadata().get("parser"));
|
||||
|
||||
ArgumentCaptor<RagDocument> captor = ArgumentCaptor.forClass(RagDocument.class);
|
||||
verify(ragDocumentService, times(2)).updateById(captor.capture());
|
||||
verify(ragDocumentMapper, times(2)).updateById(captor.capture());
|
||||
List<RagDocument> updates = captor.getAllValues();
|
||||
assertEquals(RagParseStatusEnum.PARSING.name(), updates.get(0).getParseStatus());
|
||||
assertEquals(RagParseStatusEnum.PARSED.name(), updates.get(1).getParseStatus());
|
||||
@@ -115,17 +118,18 @@ class RagDocumentParseServiceImplTests {
|
||||
AttachmentProperties attachmentProperties = new AttachmentProperties();
|
||||
attachmentProperties.setBasePath(tempDir.toString());
|
||||
RagDocumentParseServiceImpl service = new RagDocumentParseServiceImpl(
|
||||
ragDocumentService,
|
||||
ragDocumentMapper,
|
||||
sysAttachmentService,
|
||||
attachmentProperties,
|
||||
new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles")))
|
||||
new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles"))),
|
||||
mock(IRagDocumentParseResultService.class)
|
||||
);
|
||||
RagDocumentParseRequest request = new RagDocumentParseRequest();
|
||||
request.setDocumentIds(List.of(1002L));
|
||||
|
||||
when(ragDocumentService.getById(1002L)).thenReturn(document);
|
||||
when(ragDocumentMapper.selectById(1002L)).thenReturn(document);
|
||||
when(sysAttachmentService.getById(3004L)).thenReturn(attachment);
|
||||
when(ragDocumentService.updateById(any(RagDocument.class))).thenReturn(true);
|
||||
when(ragDocumentMapper.updateById(any(RagDocument.class))).thenReturn(1);
|
||||
|
||||
List<RagDocumentParseResponse> responses = service.parse(request);
|
||||
|
||||
@@ -139,10 +143,11 @@ class RagDocumentParseServiceImplTests {
|
||||
AttachmentProperties attachmentProperties = new AttachmentProperties();
|
||||
attachmentProperties.setBasePath(tempDir.toString());
|
||||
RagDocumentParseServiceImpl service = new RagDocumentParseServiceImpl(
|
||||
ragDocumentService,
|
||||
ragDocumentMapper,
|
||||
sysAttachmentService,
|
||||
attachmentProperties,
|
||||
new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles")))
|
||||
new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles"))),
|
||||
mock(IRagDocumentParseResultService.class)
|
||||
);
|
||||
RagDocumentParseRequest request = new RagDocumentParseRequest();
|
||||
request.setDocumentIds(List.of());
|
||||
|
||||
@@ -9,6 +9,7 @@ import com.bruce.rag.dto.request.RagDocumentSaveRequest;
|
||||
import com.bruce.rag.entity.RagDocument;
|
||||
import com.bruce.rag.enums.RagIndexStatusEnum;
|
||||
import com.bruce.rag.enums.RagParseStatusEnum;
|
||||
import com.bruce.rag.service.IRagDocumentAutoParseService;
|
||||
import com.bruce.rag.service.impl.RagDocumentServiceImpl;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
@@ -18,7 +19,6 @@ import org.mockito.Mock;
|
||||
import org.mockito.Spy;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
import org.springframework.context.ApplicationEventPublisher;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
@@ -40,7 +40,7 @@ class RagDocumentServiceImplTests {
|
||||
private ISysAttachmentService sysAttachmentService;
|
||||
|
||||
@Mock
|
||||
private ApplicationEventPublisher eventPublisher;
|
||||
private IRagDocumentAutoParseService ragDocumentAutoParseService;
|
||||
|
||||
@Test
|
||||
void batchUploadShouldUseRagSourceTypeAndStoreIdAsSourceId() {
|
||||
|
||||
Reference in New Issue
Block a user