refactor(modules): 拆分多模块工程并收口common基础模块
This commit is contained in:
@@ -0,0 +1,217 @@
|
||||
package com.bruce.rag;
|
||||
|
||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||
import com.baomidou.mybatisplus.extension.service.IService;
|
||||
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
|
||||
import com.baomidou.mybatisplus.annotation.TableField;
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import com.bruce.common.typehandler.PgJsonbStringTypeHandler;
|
||||
import com.bruce.common.domain.model.RequestResult;
|
||||
import com.bruce.rag.constant.RagSystemConstants;
|
||||
import com.bruce.rag.controller.RagDocumentController;
|
||||
import com.bruce.rag.controller.RagStoreController;
|
||||
import com.bruce.rag.dto.request.RagDocumentQueryRequest;
|
||||
import com.bruce.rag.dto.request.RagDocumentParseRequest;
|
||||
import com.bruce.rag.dto.request.RagStoreQueryRequest;
|
||||
import com.bruce.rag.dto.request.RagStoreSaveRequest;
|
||||
import com.bruce.rag.dto.response.RagDocumentParseResponse;
|
||||
import com.bruce.rag.dto.response.RagStoreDocumentOverviewResponse;
|
||||
import com.bruce.rag.dto.response.RagStoreOverviewResponse;
|
||||
import com.bruce.rag.dto.response.RagDocumentResponse;
|
||||
import com.bruce.rag.dto.response.RagStoreResponse;
|
||||
import com.bruce.rag.entity.RagChunk;
|
||||
import com.bruce.rag.entity.RagChunkEmbedding;
|
||||
import com.bruce.rag.entity.RagDocument;
|
||||
import com.bruce.rag.entity.RagDocumentParseResult;
|
||||
import com.bruce.rag.entity.RagStore;
|
||||
import com.bruce.rag.mapper.RagChunkEmbeddingMapper;
|
||||
import com.bruce.rag.mapper.RagChunkMapper;
|
||||
import com.bruce.rag.mapper.RagDocumentMapper;
|
||||
import com.bruce.rag.mapper.RagDocumentParseResultMapper;
|
||||
import com.bruce.rag.mapper.RagStoreMapper;
|
||||
import com.bruce.rag.service.IRagChunkEmbeddingService;
|
||||
import com.bruce.rag.service.IRagChunkService;
|
||||
import com.bruce.rag.service.IRagDocumentParseService;
|
||||
import com.bruce.rag.service.IRagDocumentParseResultService;
|
||||
import com.bruce.rag.service.IRagDocumentService;
|
||||
import com.bruce.rag.service.IRagStoreService;
|
||||
import com.bruce.rag.service.impl.RagChunkEmbeddingServiceImpl;
|
||||
import com.bruce.rag.service.impl.RagChunkServiceImpl;
|
||||
import com.bruce.rag.service.impl.RagDocumentParseResultServiceImpl;
|
||||
import com.bruce.rag.service.impl.RagDocumentServiceImpl;
|
||||
import com.bruce.rag.service.impl.RagStoreServiceImpl;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
class RagComponentStructureTests {
|
||||
|
||||
@Test
|
||||
void ragComponentsShouldReuseMybatisPlusBaseTypes() {
|
||||
assertTrue(BaseMapper.class.isAssignableFrom(RagStoreMapper.class));
|
||||
assertTrue(BaseMapper.class.isAssignableFrom(RagDocumentMapper.class));
|
||||
assertTrue(BaseMapper.class.isAssignableFrom(RagDocumentParseResultMapper.class));
|
||||
assertTrue(BaseMapper.class.isAssignableFrom(RagChunkMapper.class));
|
||||
assertTrue(BaseMapper.class.isAssignableFrom(RagChunkEmbeddingMapper.class));
|
||||
assertTrue(IService.class.isAssignableFrom(IRagStoreService.class));
|
||||
assertTrue(IService.class.isAssignableFrom(IRagDocumentService.class));
|
||||
assertTrue(IService.class.isAssignableFrom(IRagDocumentParseResultService.class));
|
||||
assertTrue(IService.class.isAssignableFrom(IRagChunkService.class));
|
||||
assertTrue(IService.class.isAssignableFrom(IRagChunkEmbeddingService.class));
|
||||
assertTrue(ServiceImpl.class.isAssignableFrom(RagStoreServiceImpl.class));
|
||||
assertTrue(ServiceImpl.class.isAssignableFrom(RagDocumentServiceImpl.class));
|
||||
assertTrue(ServiceImpl.class.isAssignableFrom(RagDocumentParseResultServiceImpl.class));
|
||||
assertTrue(ServiceImpl.class.isAssignableFrom(RagChunkServiceImpl.class));
|
||||
assertTrue(ServiceImpl.class.isAssignableFrom(RagChunkEmbeddingServiceImpl.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
void ragControllersShouldExposeRequestResultAndQueryDtoMethods() throws NoSuchMethodException {
|
||||
Method storeListMethod = RagStoreController.class.getMethod("list");
|
||||
Method storeQueryMethod = RagStoreController.class.getMethod("query", RagStoreQueryRequest.class);
|
||||
Method storeDetailMethod = RagStoreController.class.getMethod("getById", Long.class);
|
||||
Method storeOverviewMethod = RagStoreController.class.getMethod("overview");
|
||||
Method storeDocumentOverviewMethod = RagStoreController.class.getMethod("documentOverview", Long.class);
|
||||
Method storeSaveMethod = RagStoreController.class.getMethod("saveOrUpdate", RagStoreSaveRequest.class);
|
||||
Method storeDeleteMethod = RagStoreController.class.getMethod("deleteById", Long.class);
|
||||
Method storeResponseListMethod = IRagStoreService.class.getMethod("listResponses");
|
||||
Method storeServiceQueryMethod = IRagStoreService.class.getMethod("query", RagStoreQueryRequest.class);
|
||||
Method storeServiceDetailMethod = IRagStoreService.class.getMethod("getResponseById", Long.class);
|
||||
Method storeServiceOverviewMethod = IRagStoreService.class.getMethod("getOverview");
|
||||
Method storeServiceDocumentOverviewMethod = IRagStoreService.class.getMethod("getDocumentOverview", Long.class);
|
||||
Method storeServiceSaveMethod = IRagStoreService.class.getMethod("saveOrUpdate", RagStoreSaveRequest.class);
|
||||
|
||||
Method documentListMethod = RagDocumentController.class.getMethod("list");
|
||||
Method documentQueryMethod = RagDocumentController.class.getMethod("query", RagDocumentQueryRequest.class);
|
||||
Method documentParseMethod = RagDocumentController.class.getMethod("parse", RagDocumentParseRequest.class);
|
||||
Method documentResponseListMethod = IRagDocumentService.class.getMethod("listResponses");
|
||||
Method documentServiceQueryMethod = IRagDocumentService.class.getMethod("query", RagDocumentQueryRequest.class);
|
||||
Method documentParseServiceMethod = IRagDocumentParseService.class.getMethod("parse", RagDocumentParseRequest.class);
|
||||
|
||||
assertEquals(RequestResult.class, storeListMethod.getReturnType());
|
||||
assertEquals(RequestResult.class, storeQueryMethod.getReturnType());
|
||||
assertEquals(RequestResult.class, storeDetailMethod.getReturnType());
|
||||
assertEquals(RequestResult.class, storeOverviewMethod.getReturnType());
|
||||
assertEquals(RequestResult.class, storeDocumentOverviewMethod.getReturnType());
|
||||
assertEquals(RequestResult.class, storeSaveMethod.getReturnType());
|
||||
assertEquals(RequestResult.class, storeDeleteMethod.getReturnType());
|
||||
assertEquals(List.class, storeServiceQueryMethod.getReturnType());
|
||||
assertEquals(RagStoreResponse.class, storeServiceDetailMethod.getReturnType());
|
||||
assertEquals(RagStoreOverviewResponse.class, storeServiceOverviewMethod.getReturnType());
|
||||
assertEquals(RagStoreDocumentOverviewResponse.class, storeServiceDocumentOverviewMethod.getReturnType());
|
||||
assertEquals(boolean.class, storeServiceSaveMethod.getReturnType());
|
||||
assertTrue(storeResponseListMethod.getGenericReturnType().getTypeName().contains("RagStoreResponse"));
|
||||
assertTrue(storeServiceQueryMethod.getGenericReturnType().getTypeName().contains("RagStoreResponse"));
|
||||
assertTrue(storeListMethod.getGenericReturnType().getTypeName().contains("RagStoreResponse"));
|
||||
assertTrue(storeQueryMethod.getGenericReturnType().getTypeName().contains("RagStoreResponse"));
|
||||
assertTrue(storeDetailMethod.getGenericReturnType().getTypeName().contains("RagStoreResponse"));
|
||||
assertTrue(storeOverviewMethod.getGenericReturnType().getTypeName().contains("RagStoreOverviewResponse"));
|
||||
assertTrue(storeDocumentOverviewMethod.getGenericReturnType().getTypeName().contains("RagStoreDocumentOverviewResponse"));
|
||||
assertEquals(RagStoreResponse.class, RagStoreResponse.class.getMethod("fromEntity", RagStore.class).getReturnType());
|
||||
|
||||
assertEquals(RequestResult.class, documentListMethod.getReturnType());
|
||||
assertEquals(RequestResult.class, documentQueryMethod.getReturnType());
|
||||
assertEquals(RequestResult.class, documentParseMethod.getReturnType());
|
||||
assertEquals(List.class, documentServiceQueryMethod.getReturnType());
|
||||
assertEquals(List.class, documentParseServiceMethod.getReturnType());
|
||||
assertTrue(documentResponseListMethod.getGenericReturnType().getTypeName().contains("RagDocumentResponse"));
|
||||
assertTrue(documentServiceQueryMethod.getGenericReturnType().getTypeName().contains("RagDocumentResponse"));
|
||||
assertTrue(documentListMethod.getGenericReturnType().getTypeName().contains("RagDocumentResponse"));
|
||||
assertTrue(documentQueryMethod.getGenericReturnType().getTypeName().contains("RagDocumentResponse"));
|
||||
assertTrue(documentParseMethod.getGenericReturnType().getTypeName().contains("RagDocumentParseResponse"));
|
||||
assertEquals(RagDocumentResponse.class, RagDocumentResponse.class.getMethod("fromEntity", RagDocument.class).getReturnType());
|
||||
}
|
||||
|
||||
@Test
|
||||
void ragDocumentListUrlShouldUseExplicitListAction() throws NoSuchMethodException {
|
||||
Method documentListMethod = RagDocumentController.class.getMethod("list");
|
||||
|
||||
PostMapping postMapping = documentListMethod.getAnnotation(PostMapping.class);
|
||||
|
||||
assertNotNull(postMapping);
|
||||
assertEquals("/list", postMapping.value()[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
void ragSourceTypesAndDocumentRelationShouldExist() throws NoSuchFieldException {
|
||||
Field storeIdField = RagDocument.class.getDeclaredField("storeId");
|
||||
Field attachmentIdField = RagDocument.class.getDeclaredField("attachmentId");
|
||||
|
||||
assertEquals("RAG_STORE", RagSystemConstants.RAG_STORE);
|
||||
assertEquals("RAG_DOCUMENT", RagSystemConstants.RAG_DOCUMENT);
|
||||
assertEquals("RAG", RagSystemConstants.SOURCE_TYPE_RAG);
|
||||
assertEquals(Long.class, storeIdField.getType());
|
||||
assertEquals(Long.class, attachmentIdField.getType());
|
||||
assertTrue(RagStore.class.getSimpleName().contains("RagStore"));
|
||||
assertTrue(RagStoreController.class.getSimpleName().contains("RagStoreController"));
|
||||
assertTrue(RagDocumentController.class.getSimpleName().contains("RagDocumentController"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void ragChunkStructureShouldSupportChunkMetadata() throws NoSuchFieldException {
|
||||
assertEquals(Long.class, RagChunk.class.getDeclaredField("storeId").getType());
|
||||
assertEquals(Long.class, RagChunk.class.getDeclaredField("documentId").getType());
|
||||
assertEquals(Integer.class, RagChunk.class.getDeclaredField("chunkIndex").getType());
|
||||
assertEquals(String.class, RagChunk.class.getDeclaredField("chunkContent").getType());
|
||||
assertEquals(String.class, RagChunk.class.getDeclaredField("chunkSummary").getType());
|
||||
assertEquals(Integer.class, RagChunk.class.getDeclaredField("tokenCount").getType());
|
||||
assertEquals(Integer.class, RagChunk.class.getDeclaredField("pageNumber").getType());
|
||||
assertEquals(String.class, RagChunk.class.getDeclaredField("sectionTitle").getType());
|
||||
assertEquals(String.class, RagChunk.class.getDeclaredField("headingPath").getType());
|
||||
assertEquals(String.class, RagChunk.class.getDeclaredField("vectorId").getType());
|
||||
assertEquals(String.class, RagChunk.class.getDeclaredField("metadataJson").getType());
|
||||
assertEquals(Boolean.class, RagChunk.class.getDeclaredField("enabled").getType());
|
||||
assertEquals(String.class, RagChunk.class.getDeclaredField("remark").getType());
|
||||
}
|
||||
|
||||
@Test
|
||||
void ragChunkEmbeddingStructureShouldSupportPgvectorMetadata() throws NoSuchFieldException {
|
||||
assertEquals(Long.class, RagChunkEmbedding.class.getDeclaredField("storeId").getType());
|
||||
assertEquals(Long.class, RagChunkEmbedding.class.getDeclaredField("documentId").getType());
|
||||
assertEquals(Long.class, RagChunkEmbedding.class.getDeclaredField("chunkId").getType());
|
||||
assertEquals(String.class, RagChunkEmbedding.class.getDeclaredField("embeddingModel").getType());
|
||||
assertEquals(Integer.class, RagChunkEmbedding.class.getDeclaredField("embeddingDimension").getType());
|
||||
assertEquals(String.class, RagChunkEmbedding.class.getDeclaredField("embedding").getType());
|
||||
assertEquals(String.class, RagChunkEmbedding.class.getDeclaredField("contentHash").getType());
|
||||
assertEquals(Boolean.class, RagChunkEmbedding.class.getDeclaredField("enabled").getType());
|
||||
assertEquals(String.class, RagChunkEmbedding.class.getDeclaredField("remark").getType());
|
||||
}
|
||||
|
||||
@Test
|
||||
void ragParseResultStructureShouldSupportSnapshotMetadata() throws NoSuchFieldException {
|
||||
assertEquals(Long.class, RagDocumentParseResult.class.getDeclaredField("storeId").getType());
|
||||
assertEquals(Long.class, RagDocumentParseResult.class.getDeclaredField("documentId").getType());
|
||||
assertEquals(String.class, RagDocumentParseResult.class.getDeclaredField("parsedText").getType());
|
||||
assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("textLength").getType());
|
||||
assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("pageCount").getType());
|
||||
assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("sheetCount").getType());
|
||||
assertEquals(String.class, RagDocumentParseResult.class.getDeclaredField("metadataJson").getType());
|
||||
assertEquals(String.class, RagDocumentParseResult.class.getDeclaredField("contentHash").getType());
|
||||
assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("parseVersion").getType());
|
||||
assertEquals(Boolean.class, RagDocumentParseResult.class.getDeclaredField("enabled").getType());
|
||||
}
|
||||
|
||||
@Test
|
||||
void ragMetadataJsonFieldsShouldUseJsonbTypeHandler() throws NoSuchFieldException {
|
||||
TableName chunkTable = RagChunk.class.getAnnotation(TableName.class);
|
||||
TableName parseResultTable = RagDocumentParseResult.class.getAnnotation(TableName.class);
|
||||
TableField chunkMetadataField = RagChunk.class.getDeclaredField("metadataJson").getAnnotation(TableField.class);
|
||||
TableField parseResultMetadataField = RagDocumentParseResult.class.getDeclaredField("metadataJson").getAnnotation(TableField.class);
|
||||
|
||||
assertNotNull(chunkTable);
|
||||
assertNotNull(parseResultTable);
|
||||
assertTrue(chunkTable.autoResultMap());
|
||||
assertTrue(parseResultTable.autoResultMap());
|
||||
assertNotNull(chunkMetadataField);
|
||||
assertNotNull(parseResultMetadataField);
|
||||
assertEquals(PgJsonbStringTypeHandler.class, chunkMetadataField.typeHandler());
|
||||
assertEquals(PgJsonbStringTypeHandler.class, parseResultMetadataField.typeHandler());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,180 @@
|
||||
package com.bruce.rag;
|
||||
|
||||
import com.bruce.common.config.AttachmentProperties;
|
||||
import com.bruce.common.document.parse.DocumentParseContext;
|
||||
import com.bruce.common.document.parse.DocumentParseResult;
|
||||
import com.bruce.common.document.parse.DocumentParser;
|
||||
import com.bruce.common.document.parse.DocumentParserFactory;
|
||||
import com.bruce.common.domain.entity.SysAttachment;
|
||||
import com.bruce.common.service.ISysAttachmentService;
|
||||
import com.bruce.rag.dto.request.RagDocumentParseRequest;
|
||||
import com.bruce.rag.dto.response.RagDocumentParseResponse;
|
||||
import com.bruce.rag.entity.RagDocument;
|
||||
import com.bruce.rag.mapper.RagDocumentMapper;
|
||||
import com.bruce.rag.service.IRagDocumentParseResultService;
|
||||
import com.bruce.rag.enums.RagParseStatusEnum;
|
||||
import com.bruce.rag.service.impl.RagDocumentParseServiceImpl;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.times;
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.mockito.Mockito.mock;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class RagDocumentParseServiceImplTests {
|
||||
|
||||
@TempDir
|
||||
private Path tempDir;
|
||||
|
||||
@Mock
|
||||
private RagDocumentMapper ragDocumentMapper;
|
||||
|
||||
@Mock
|
||||
private ISysAttachmentService sysAttachmentService;
|
||||
|
||||
@Test
|
||||
void parseShouldUpdateStatusAndReturnParseResponse() throws Exception {
|
||||
Path file = tempDir.resolve("rag").resolve("people.txt");
|
||||
Files.createDirectories(file.getParent());
|
||||
Files.writeString(file, "people profiles");
|
||||
|
||||
RagDocument document = new RagDocument();
|
||||
document.setId(1001L);
|
||||
document.setStoreId(2002L);
|
||||
document.setAttachmentId(3003L);
|
||||
document.setParseStatus(RagParseStatusEnum.UPLOADED.name());
|
||||
|
||||
SysAttachment attachment = new SysAttachment();
|
||||
attachment.setId(3003L);
|
||||
attachment.setOriginalName("people.txt");
|
||||
attachment.setFileSuffix("txt");
|
||||
attachment.setContentType("text/plain");
|
||||
attachment.setFilePath("rag/people.txt");
|
||||
|
||||
AttachmentProperties attachmentProperties = new AttachmentProperties();
|
||||
attachmentProperties.setBasePath(tempDir.toString());
|
||||
DocumentParser parser = new FixedDocumentParser("people profiles");
|
||||
RagDocumentParseServiceImpl service = new RagDocumentParseServiceImpl(
|
||||
ragDocumentMapper,
|
||||
sysAttachmentService,
|
||||
attachmentProperties,
|
||||
new DocumentParserFactory(List.of(parser)),
|
||||
mock(IRagDocumentParseResultService.class)
|
||||
);
|
||||
|
||||
when(ragDocumentMapper.selectById(1001L)).thenReturn(document);
|
||||
when(sysAttachmentService.getById(3003L)).thenReturn(attachment);
|
||||
when(ragDocumentMapper.updateById(any(RagDocument.class))).thenReturn(1);
|
||||
|
||||
RagDocumentParseResponse response = service.parse(1001L);
|
||||
|
||||
assertEquals(1001L, response.getDocumentId());
|
||||
assertEquals(RagParseStatusEnum.PARSED.name(), response.getParseStatus());
|
||||
assertEquals(15, response.getTextLength());
|
||||
assertEquals("fixed", response.getMetadata().get("parser"));
|
||||
|
||||
ArgumentCaptor<RagDocument> captor = ArgumentCaptor.forClass(RagDocument.class);
|
||||
verify(ragDocumentMapper, times(2)).updateById(captor.capture());
|
||||
List<RagDocument> updates = captor.getAllValues();
|
||||
assertEquals(RagParseStatusEnum.PARSING.name(), updates.get(0).getParseStatus());
|
||||
assertEquals(RagParseStatusEnum.PARSED.name(), updates.get(1).getParseStatus());
|
||||
assertTrue(parser.supports(new DocumentParseContext()));
|
||||
}
|
||||
|
||||
@Test
|
||||
void parseShouldSupportBatchRequest() throws Exception {
|
||||
Path file = tempDir.resolve("rag").resolve("batch.txt");
|
||||
Files.createDirectories(file.getParent());
|
||||
Files.writeString(file, "batch profiles");
|
||||
|
||||
RagDocument document = new RagDocument();
|
||||
document.setId(1002L);
|
||||
document.setStoreId(2002L);
|
||||
document.setAttachmentId(3004L);
|
||||
document.setParseStatus(RagParseStatusEnum.UPLOADED.name());
|
||||
|
||||
SysAttachment attachment = new SysAttachment();
|
||||
attachment.setId(3004L);
|
||||
attachment.setOriginalName("batch.txt");
|
||||
attachment.setFileSuffix("txt");
|
||||
attachment.setContentType("text/plain");
|
||||
attachment.setFilePath("rag/batch.txt");
|
||||
|
||||
AttachmentProperties attachmentProperties = new AttachmentProperties();
|
||||
attachmentProperties.setBasePath(tempDir.toString());
|
||||
RagDocumentParseServiceImpl service = new RagDocumentParseServiceImpl(
|
||||
ragDocumentMapper,
|
||||
sysAttachmentService,
|
||||
attachmentProperties,
|
||||
new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles"))),
|
||||
mock(IRagDocumentParseResultService.class)
|
||||
);
|
||||
RagDocumentParseRequest request = new RagDocumentParseRequest();
|
||||
request.setDocumentIds(List.of(1002L));
|
||||
|
||||
when(ragDocumentMapper.selectById(1002L)).thenReturn(document);
|
||||
when(sysAttachmentService.getById(3004L)).thenReturn(attachment);
|
||||
when(ragDocumentMapper.updateById(any(RagDocument.class))).thenReturn(1);
|
||||
|
||||
List<RagDocumentParseResponse> responses = service.parse(request);
|
||||
|
||||
assertEquals(1, responses.size());
|
||||
assertEquals(1002L, responses.getFirst().getDocumentId());
|
||||
assertEquals(RagParseStatusEnum.PARSED.name(), responses.getFirst().getParseStatus());
|
||||
}
|
||||
|
||||
@Test
|
||||
void parseShouldRejectEmptyDocumentIds() {
|
||||
AttachmentProperties attachmentProperties = new AttachmentProperties();
|
||||
attachmentProperties.setBasePath(tempDir.toString());
|
||||
RagDocumentParseServiceImpl service = new RagDocumentParseServiceImpl(
|
||||
ragDocumentMapper,
|
||||
sysAttachmentService,
|
||||
attachmentProperties,
|
||||
new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles"))),
|
||||
mock(IRagDocumentParseResultService.class)
|
||||
);
|
||||
RagDocumentParseRequest request = new RagDocumentParseRequest();
|
||||
request.setDocumentIds(List.of());
|
||||
|
||||
assertThrows(IllegalArgumentException.class, () -> service.parse(request));
|
||||
}
|
||||
|
||||
private static class FixedDocumentParser implements DocumentParser {
|
||||
|
||||
private final String text;
|
||||
|
||||
private FixedDocumentParser(String text) {
|
||||
this.text = text;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean supports(DocumentParseContext context) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocumentParseResult parse(DocumentParseContext context) {
|
||||
DocumentParseResult result = new DocumentParseResult();
|
||||
result.setText(text);
|
||||
result.setTextLength(text.length());
|
||||
result.setMetadata(Map.of("parser", "fixed"));
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,164 @@
|
||||
package com.bruce.rag;
|
||||
|
||||
import com.bruce.common.domain.entity.SysAttachment;
|
||||
import com.bruce.common.dto.request.SysAttachmentUploadRequest;
|
||||
import com.bruce.common.service.ISysAttachmentService;
|
||||
import com.bruce.rag.constant.RagSystemConstants;
|
||||
import com.bruce.rag.dto.request.RagDocumentBatchUploadRequest;
|
||||
import com.bruce.rag.dto.request.RagDocumentSaveRequest;
|
||||
import com.bruce.rag.entity.RagDocument;
|
||||
import com.bruce.rag.enums.RagIndexStatusEnum;
|
||||
import com.bruce.rag.enums.RagParseStatusEnum;
|
||||
import com.bruce.rag.service.IRagDocumentAutoParseService;
|
||||
import com.bruce.rag.service.impl.RagDocumentServiceImpl;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.Spy;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.Mockito.doAnswer;
|
||||
import static org.mockito.Mockito.doReturn;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class RagDocumentServiceImplTests {
|
||||
|
||||
@Spy
|
||||
@InjectMocks
|
||||
private RagDocumentServiceImpl ragDocumentService;
|
||||
|
||||
@Mock
|
||||
private ISysAttachmentService sysAttachmentService;
|
||||
|
||||
@Mock
|
||||
private IRagDocumentAutoParseService ragDocumentAutoParseService;
|
||||
|
||||
@Test
|
||||
void batchUploadShouldUseRagSourceTypeAndStoreIdAsSourceId() {
|
||||
MockMultipartFile file = new MockMultipartFile(
|
||||
"files",
|
||||
"knowledge.txt",
|
||||
"text/plain",
|
||||
"hello rag".getBytes()
|
||||
);
|
||||
RagDocumentBatchUploadRequest request = new RagDocumentBatchUploadRequest();
|
||||
request.setStoreId(1001L);
|
||||
request.setSourceType(RagSystemConstants.SOURCE_TYPE_RAG);
|
||||
request.setFiles(new MockMultipartFile[]{file});
|
||||
request.setDocumentSummary("批量摘要");
|
||||
request.setRemark("批量备注");
|
||||
|
||||
SysAttachment attachment = new SysAttachment();
|
||||
attachment.setId(2002L);
|
||||
when(sysAttachmentService.upload(any(SysAttachmentUploadRequest.class))).thenReturn(attachment);
|
||||
doAnswer(invocation -> true).when(ragDocumentService).save(any(RagDocument.class));
|
||||
|
||||
var responses = ragDocumentService.batchUpload(request);
|
||||
|
||||
ArgumentCaptor<SysAttachmentUploadRequest> uploadCaptor = ArgumentCaptor.forClass(SysAttachmentUploadRequest.class);
|
||||
verify(sysAttachmentService).upload(uploadCaptor.capture());
|
||||
SysAttachmentUploadRequest uploadRequest = uploadCaptor.getValue();
|
||||
assertEquals(RagSystemConstants.SOURCE_TYPE_RAG, uploadRequest.getSourceType());
|
||||
assertEquals(1001L, uploadRequest.getSourceId());
|
||||
assertEquals(file, uploadRequest.getFile());
|
||||
|
||||
ArgumentCaptor<RagDocument> documentCaptor = ArgumentCaptor.forClass(RagDocument.class);
|
||||
verify(ragDocumentService).save(documentCaptor.capture());
|
||||
RagDocument savedDocument = documentCaptor.getValue();
|
||||
assertEquals(1001L, savedDocument.getStoreId());
|
||||
assertEquals(2002L, savedDocument.getAttachmentId());
|
||||
assertEquals("knowledge.txt", savedDocument.getDocumentTitle());
|
||||
assertEquals("批量摘要", savedDocument.getDocumentSummary());
|
||||
assertEquals(RagParseStatusEnum.UPLOADED.name(), savedDocument.getParseStatus());
|
||||
assertEquals(RagIndexStatusEnum.PENDING.name(), savedDocument.getIndexStatus());
|
||||
assertTrue(savedDocument.getEnabled());
|
||||
assertNull(savedDocument.getErrorMessage());
|
||||
assertEquals("批量备注", savedDocument.getRemark());
|
||||
assertEquals(1, responses.size());
|
||||
assertEquals(RagParseStatusEnum.UPLOADED.name(), responses.getFirst().getParseStatus());
|
||||
assertEquals(RagIndexStatusEnum.PENDING.name(), responses.getFirst().getIndexStatus());
|
||||
}
|
||||
|
||||
@Test
|
||||
void saveOrUpdateShouldWriteAllEditableFields() {
|
||||
RagDocument existingDocument = new RagDocument();
|
||||
existingDocument.setId(3003L);
|
||||
|
||||
RagDocumentSaveRequest request = new RagDocumentSaveRequest();
|
||||
request.setId(3003L);
|
||||
request.setStoreId(1001L);
|
||||
request.setAttachmentId(2002L);
|
||||
request.setDocumentTitle(" 新标题 ");
|
||||
request.setDocumentSummary(" 新摘要 ");
|
||||
request.setParseStatus(RagParseStatusEnum.PARSED.name());
|
||||
request.setIndexStatus(RagIndexStatusEnum.INDEXED.name());
|
||||
request.setEnabled(false);
|
||||
request.setErrorMessage(" 已修复 ");
|
||||
request.setRemark(" 备注信息 ");
|
||||
|
||||
doReturn(existingDocument).when(ragDocumentService).getById(3003L);
|
||||
doReturn(true).when(ragDocumentService).updateById(any(RagDocument.class));
|
||||
|
||||
boolean result = ragDocumentService.saveOrUpdate(request);
|
||||
|
||||
assertTrue(result);
|
||||
ArgumentCaptor<RagDocument> documentCaptor = ArgumentCaptor.forClass(RagDocument.class);
|
||||
verify(ragDocumentService).updateById(documentCaptor.capture());
|
||||
RagDocument savedDocument = documentCaptor.getValue();
|
||||
assertEquals(3003L, savedDocument.getId());
|
||||
assertEquals(1001L, savedDocument.getStoreId());
|
||||
assertEquals(2002L, savedDocument.getAttachmentId());
|
||||
assertEquals("新标题", savedDocument.getDocumentTitle());
|
||||
assertEquals("新摘要", savedDocument.getDocumentSummary());
|
||||
assertEquals(RagParseStatusEnum.PARSED.name(), savedDocument.getParseStatus());
|
||||
assertEquals(RagIndexStatusEnum.INDEXED.name(), savedDocument.getIndexStatus());
|
||||
assertEquals(false, savedDocument.getEnabled());
|
||||
assertEquals("已修复", savedDocument.getErrorMessage());
|
||||
assertEquals("备注信息", savedDocument.getRemark());
|
||||
}
|
||||
|
||||
@Test
|
||||
void saveOrUpdateShouldPreserveExistingFieldsForPartialUpdate() {
|
||||
RagDocument existingDocument = new RagDocument();
|
||||
existingDocument.setId(3003L);
|
||||
existingDocument.setStoreId(1001L);
|
||||
existingDocument.setAttachmentId(2002L);
|
||||
existingDocument.setDocumentTitle("people_profiles.txt");
|
||||
existingDocument.setDocumentSummary("测试人员信息,有多条人员信息");
|
||||
existingDocument.setParseStatus(RagParseStatusEnum.UPLOADED.name());
|
||||
existingDocument.setIndexStatus(RagIndexStatusEnum.PENDING.name());
|
||||
existingDocument.setEnabled(true);
|
||||
existingDocument.setRemark("测试人员信息");
|
||||
|
||||
RagDocumentSaveRequest request = new RagDocumentSaveRequest();
|
||||
request.setId(3003L);
|
||||
request.setStoreId(1001L);
|
||||
request.setDocumentTitle("people_profiles.txt");
|
||||
request.setEnabled(false);
|
||||
|
||||
doReturn(existingDocument).when(ragDocumentService).getById(3003L);
|
||||
doReturn(true).when(ragDocumentService).updateById(any(RagDocument.class));
|
||||
|
||||
boolean result = ragDocumentService.saveOrUpdate(request);
|
||||
|
||||
assertTrue(result);
|
||||
ArgumentCaptor<RagDocument> documentCaptor = ArgumentCaptor.forClass(RagDocument.class);
|
||||
verify(ragDocumentService).updateById(documentCaptor.capture());
|
||||
RagDocument savedDocument = documentCaptor.getValue();
|
||||
assertEquals(2002L, savedDocument.getAttachmentId());
|
||||
assertEquals("测试人员信息,有多条人员信息", savedDocument.getDocumentSummary());
|
||||
assertEquals(RagParseStatusEnum.UPLOADED.name(), savedDocument.getParseStatus());
|
||||
assertEquals(RagIndexStatusEnum.PENDING.name(), savedDocument.getIndexStatus());
|
||||
assertEquals(false, savedDocument.getEnabled());
|
||||
assertEquals("测试人员信息", savedDocument.getRemark());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,126 @@
|
||||
package com.bruce.rag;
|
||||
|
||||
import com.bruce.common.enums.EnableStatusEnum;
|
||||
import com.bruce.rag.dto.response.RagDocumentResponse;
|
||||
import com.bruce.rag.dto.response.RagStoreDocumentOverviewResponse;
|
||||
import com.bruce.rag.dto.response.RagStoreOverviewResponse;
|
||||
import com.bruce.rag.entity.RagStore;
|
||||
import com.bruce.rag.enums.RagIndexStatusEnum;
|
||||
import com.bruce.rag.enums.RagParseStatusEnum;
|
||||
import com.bruce.rag.service.IRagDocumentService;
|
||||
import com.bruce.rag.service.impl.RagStoreServiceImpl;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.Spy;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.mockito.Mockito.doReturn;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class RagStoreOverviewServiceTests {
|
||||
|
||||
@Spy
|
||||
@InjectMocks
|
||||
private RagStoreServiceImpl ragStoreService;
|
||||
|
||||
@Mock
|
||||
private IRagDocumentService ragDocumentService;
|
||||
|
||||
@Test
|
||||
void getOverviewShouldAggregateStoreAndDocumentCounts() {
|
||||
RagStore enabledStore = new RagStore();
|
||||
enabledStore.setId(1L);
|
||||
enabledStore.setStatus(EnableStatusEnum.ENABLED.getLabel());
|
||||
RagStore disabledStore = new RagStore();
|
||||
disabledStore.setId(2L);
|
||||
disabledStore.setStatus("停用");
|
||||
|
||||
when(ragDocumentService.listResponses()).thenReturn(List.of(
|
||||
createDocumentResponse("11", "1", true, RagParseStatusEnum.UPLOADED.name(), RagIndexStatusEnum.PENDING.name(), new Date()),
|
||||
createDocumentResponse("22", "2", false, RagParseStatusEnum.PARSED.name(), RagIndexStatusEnum.INDEXED.name(), new Date())
|
||||
));
|
||||
doReturn(List.of(enabledStore, disabledStore)).when(ragStoreService).list();
|
||||
|
||||
RagStoreOverviewResponse response = ragStoreService.getOverview();
|
||||
|
||||
assertEquals(2, response.getTotalStores());
|
||||
assertEquals(2, response.getTotalDocuments());
|
||||
assertNull(response.getTotalChunks());
|
||||
assertEquals(1, response.getRetrievableStores());
|
||||
}
|
||||
|
||||
@Test
|
||||
void getDocumentOverviewShouldAggregateCurrentStoreDocumentMetrics() {
|
||||
RagStore store = new RagStore();
|
||||
store.setId(1L);
|
||||
store.setStoreName("产品制度库");
|
||||
doReturn(store).when(ragStoreService).getById(1L);
|
||||
when(ragDocumentService.query(org.mockito.ArgumentMatchers.any())).thenReturn(List.of(
|
||||
createDocumentResponse("11", "1", true, RagParseStatusEnum.UPLOADED.name(), RagIndexStatusEnum.PENDING.name(), new Date(1747816496000L)),
|
||||
createDocumentResponse("12", "1", true, RagParseStatusEnum.PARSED.name(), RagIndexStatusEnum.INDEXED.name(), new Date(1747820096000L)),
|
||||
createDocumentResponse("13", "1", false, RagParseStatusEnum.FAILED.name(), RagIndexStatusEnum.FAILED.name(), new Date(1747812896000L))
|
||||
));
|
||||
|
||||
RagStoreDocumentOverviewResponse response = ragStoreService.getDocumentOverview(1L);
|
||||
|
||||
assertEquals(1L, response.getStoreId());
|
||||
assertEquals("产品制度库", response.getStoreName());
|
||||
assertEquals(3, response.getDocumentCount());
|
||||
assertEquals(2, response.getEnabledDocumentCount());
|
||||
assertEquals(1, response.getParsedDocumentCount());
|
||||
assertEquals(1, response.getIndexedDocumentCount());
|
||||
assertEquals(new Date(1747820096000L), response.getLastUploadTime());
|
||||
}
|
||||
|
||||
@Test
|
||||
void getDocumentOverviewShouldQueryDocumentsByStoreIdOnly() {
|
||||
RagStore store = new RagStore();
|
||||
store.setId(1L);
|
||||
store.setStoreName("产品制度库");
|
||||
doReturn(store).when(ragStoreService).getById(1L);
|
||||
when(ragDocumentService.query(org.mockito.ArgumentMatchers.any())).thenReturn(List.of());
|
||||
|
||||
ragStoreService.getDocumentOverview(1L);
|
||||
|
||||
org.mockito.ArgumentCaptor<com.bruce.rag.dto.request.RagDocumentQueryRequest> captor =
|
||||
org.mockito.ArgumentCaptor.forClass(com.bruce.rag.dto.request.RagDocumentQueryRequest.class);
|
||||
org.mockito.Mockito.verify(ragDocumentService).query(captor.capture());
|
||||
assertEquals(1L, captor.getValue().getStoreId());
|
||||
assertNull(captor.getValue().getParseStatus());
|
||||
assertNull(captor.getValue().getIndexStatus());
|
||||
}
|
||||
|
||||
@Test
|
||||
void getDocumentOverviewShouldRejectUnknownStore() {
|
||||
doReturn(null).when(ragStoreService).getById(999L);
|
||||
|
||||
assertThrows(IllegalArgumentException.class, () -> ragStoreService.getDocumentOverview(999L));
|
||||
}
|
||||
|
||||
private RagDocumentResponse createDocumentResponse(
|
||||
String id,
|
||||
String storeId,
|
||||
boolean enabled,
|
||||
String parseStatus,
|
||||
String indexStatus,
|
||||
Date createTime
|
||||
) {
|
||||
RagDocumentResponse response = new RagDocumentResponse();
|
||||
response.setId(Long.valueOf(id));
|
||||
response.setStoreId(Long.valueOf(storeId));
|
||||
response.setEnabled(enabled);
|
||||
response.setParseStatus(parseStatus);
|
||||
response.setIndexStatus(indexStatus);
|
||||
response.setCreateTime(createTime);
|
||||
return response;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
package com.bruce.rag;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.bruce.rag.dto.response.RagDocumentResponse;
|
||||
import com.bruce.rag.dto.response.RagStoreResponse;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
class RagStoreResponseSerializationTests {
|
||||
|
||||
@Test
|
||||
void idShouldSerializeAsStringForFrontendPrecisionSafety() throws Exception {
|
||||
RagStoreResponse response = new RagStoreResponse();
|
||||
response.setId(2057302206052372481L);
|
||||
response.setStoreCode("TEXT-1");
|
||||
response.setStoreName("测试库1");
|
||||
|
||||
String json = new ObjectMapper().writeValueAsString(response);
|
||||
|
||||
assertTrue(json.contains("\"id\":\"2057302206052372481\""));
|
||||
}
|
||||
|
||||
@Test
|
||||
void responseTimeShouldSerializeWithUnifiedFormat() throws Exception {
|
||||
RagStoreResponse storeResponse = new RagStoreResponse();
|
||||
storeResponse.setCreateTime(new Date(1747816496000L));
|
||||
storeResponse.setUpdateTime(new Date(1747816496000L));
|
||||
|
||||
RagDocumentResponse documentResponse = new RagDocumentResponse();
|
||||
documentResponse.setCreateTime(new Date(1747816496000L));
|
||||
documentResponse.setUpdateTime(new Date(1747816496000L));
|
||||
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
String storeJson = objectMapper.writeValueAsString(storeResponse);
|
||||
String documentJson = objectMapper.writeValueAsString(documentResponse);
|
||||
|
||||
assertTrue(storeJson.contains("\"createTime\":\"2025-05-21 16:34:56\""));
|
||||
assertTrue(storeJson.contains("\"updateTime\":\"2025-05-21 16:34:56\""));
|
||||
assertTrue(documentJson.contains("\"createTime\":\"2025-05-21 16:34:56\""));
|
||||
assertTrue(documentJson.contains("\"updateTime\":\"2025-05-21 16:34:56\""));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
package com.bruce.rag;
|
||||
|
||||
import com.bruce.common.enums.EnableStatusEnum;
|
||||
import com.bruce.rag.dto.request.RagStoreSaveRequest;
|
||||
import com.bruce.rag.entity.RagStore;
|
||||
import com.bruce.rag.service.impl.RagStoreServiceImpl;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
|
||||
class RagStoreSaveValidationTests {
|
||||
|
||||
@Test
|
||||
void saveShouldRejectBlankStoreCode() {
|
||||
RagStoreServiceImpl service = new RagStoreServiceImpl();
|
||||
RagStoreSaveRequest request = new RagStoreSaveRequest();
|
||||
request.setStoreName("产品制度库");
|
||||
|
||||
assertThrows(IllegalArgumentException.class, () -> service.validateSaveRequest(request));
|
||||
}
|
||||
|
||||
@Test
|
||||
void saveShouldRejectBlankStoreName() {
|
||||
RagStoreServiceImpl service = new RagStoreServiceImpl();
|
||||
RagStoreSaveRequest request = new RagStoreSaveRequest();
|
||||
request.setStoreCode("PROD_DOC");
|
||||
|
||||
assertThrows(IllegalArgumentException.class, () -> service.validateSaveRequest(request));
|
||||
}
|
||||
|
||||
@Test
|
||||
void saveShouldAcceptMinimalValidRequest() {
|
||||
RagStoreServiceImpl service = new RagStoreServiceImpl();
|
||||
RagStoreSaveRequest request = new RagStoreSaveRequest();
|
||||
request.setStoreCode("PROD_DOC");
|
||||
request.setStoreName("产品制度库");
|
||||
|
||||
assertDoesNotThrow(() -> service.validateSaveRequest(request));
|
||||
}
|
||||
|
||||
@Test
|
||||
void saveShouldDefaultStatusToEnabledEnumLabel() {
|
||||
RagStoreServiceImpl service = new RagStoreServiceImpl();
|
||||
RagStoreSaveRequest request = new RagStoreSaveRequest();
|
||||
request.setStoreCode("PROD_DOC");
|
||||
request.setStoreName("产品制度库");
|
||||
|
||||
RagStore ragStore = service.buildEntity(request);
|
||||
|
||||
assertEquals(EnableStatusEnum.ENABLED.getLabel(), ragStore.getStatus());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
package com.bruce.rag.parse;
|
||||
|
||||
import com.bruce.rag.entity.RagChunk;
|
||||
import com.bruce.rag.enums.RagChunkStrategyEnum;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertSame;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
|
||||
class ChunkerFactoryTests {
|
||||
|
||||
@Test
|
||||
void resolveShouldReturnMatchingChunker() {
|
||||
Chunker supported = new StubChunker(RagChunkStrategyEnum.FIXED_LENGTH);
|
||||
Chunker unsupported = new StubChunker(RagChunkStrategyEnum.DELIMITER);
|
||||
ChunkerFactory factory = new ChunkerFactory(List.of(supported, unsupported));
|
||||
|
||||
Chunker resolved = factory.resolve(RagChunkStrategyEnum.FIXED_LENGTH);
|
||||
|
||||
assertSame(supported, resolved);
|
||||
}
|
||||
|
||||
@Test
|
||||
void resolveShouldRejectUnsupportedStrategy() {
|
||||
ChunkerFactory factory = new ChunkerFactory(List.of(new StubChunker(RagChunkStrategyEnum.FIXED_LENGTH)));
|
||||
|
||||
assertThrows(IllegalArgumentException.class, () -> factory.resolve(RagChunkStrategyEnum.SEMANTIC));
|
||||
}
|
||||
|
||||
private static class StubChunker implements Chunker {
|
||||
|
||||
private final RagChunkStrategyEnum strategy;
|
||||
|
||||
private StubChunker(RagChunkStrategyEnum strategy) {
|
||||
this.strategy = strategy;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean supports(RagChunkStrategyEnum strategy) {
|
||||
return this.strategy == strategy;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<RagChunk> chunk(RagChunkCommand command) {
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package com.bruce.rag.parse;
|
||||
|
||||
import com.bruce.common.document.parse.DocumentParseResult;
|
||||
import com.bruce.rag.entity.RagChunk;
|
||||
import com.bruce.rag.entity.RagDocument;
|
||||
import com.bruce.rag.enums.RagChunkStrategyEnum;
|
||||
import com.bruce.rag.parse.impl.DelimiterChunker;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
class DelimiterChunkerTests {
|
||||
|
||||
@Test
|
||||
void chunkShouldSplitByDelimiterAndIgnoreBlankSegments() {
|
||||
DelimiterChunker chunker = new DelimiterChunker();
|
||||
|
||||
RagChunkCommand command = new RagChunkCommand();
|
||||
command.setDocument(buildDocument());
|
||||
command.setParseResult(buildParseResult("第一段。第二段。。第三段"));
|
||||
command.setChunkStrategy(RagChunkStrategyEnum.DELIMITER.getValue());
|
||||
command.setDelimiter("。");
|
||||
|
||||
List<RagChunk> chunks = chunker.chunk(command);
|
||||
|
||||
assertEquals(3, chunks.size());
|
||||
assertEquals("第一段", chunks.get(0).getChunkContent());
|
||||
assertEquals("第二段", chunks.get(1).getChunkContent());
|
||||
assertEquals("第三段", chunks.get(2).getChunkContent());
|
||||
assertEquals(0, chunks.get(0).getChunkIndex());
|
||||
assertEquals(1, chunks.get(1).getChunkIndex());
|
||||
assertEquals(2, chunks.get(2).getChunkIndex());
|
||||
}
|
||||
|
||||
@Test
|
||||
void chunkShouldReturnEmptyListForBlankText() {
|
||||
DelimiterChunker chunker = new DelimiterChunker();
|
||||
|
||||
RagChunkCommand command = new RagChunkCommand();
|
||||
command.setDocument(buildDocument());
|
||||
command.setParseResult(buildParseResult(" "));
|
||||
command.setChunkStrategy(RagChunkStrategyEnum.DELIMITER.getValue());
|
||||
command.setDelimiter("。");
|
||||
|
||||
assertTrue(chunker.chunk(command).isEmpty());
|
||||
}
|
||||
|
||||
private static RagDocument buildDocument() {
|
||||
RagDocument document = new RagDocument();
|
||||
document.setId(66L);
|
||||
document.setStoreId(55L);
|
||||
return document;
|
||||
}
|
||||
|
||||
private static DocumentParseResult buildParseResult(String text) {
|
||||
DocumentParseResult result = new DocumentParseResult();
|
||||
result.setText(text);
|
||||
result.setTextLength(text.length());
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
package com.bruce.rag.parse;
|
||||
|
||||
import com.bruce.common.document.parse.DocumentParseResult;
|
||||
import com.bruce.rag.entity.RagChunk;
|
||||
import com.bruce.rag.entity.RagDocument;
|
||||
import com.bruce.rag.enums.RagChunkStrategyEnum;
|
||||
import com.bruce.rag.parse.impl.FixedLengthChunker;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
class FixedLengthChunkerTests {
|
||||
|
||||
@Test
|
||||
void chunkShouldSplitTextByChunkSizeAndOverlap() {
|
||||
FixedLengthChunker chunker = new FixedLengthChunker();
|
||||
|
||||
RagChunkCommand command = new RagChunkCommand();
|
||||
command.setDocument(buildDocument());
|
||||
command.setParseResult(buildParseResult("abcdefghij"));
|
||||
command.setChunkStrategy(RagChunkStrategyEnum.FIXED_LENGTH.getValue());
|
||||
command.setChunkSize(4);
|
||||
command.setChunkOverlap(1);
|
||||
|
||||
List<RagChunk> chunks = chunker.chunk(command);
|
||||
|
||||
assertEquals(3, chunks.size());
|
||||
assertEquals("abcd", chunks.get(0).getChunkContent());
|
||||
assertEquals("defg", chunks.get(1).getChunkContent());
|
||||
assertEquals("ghij", chunks.get(2).getChunkContent());
|
||||
assertEquals(0, chunks.get(0).getChunkIndex());
|
||||
assertEquals(1, chunks.get(1).getChunkIndex());
|
||||
assertEquals(2, chunks.get(2).getChunkIndex());
|
||||
assertEquals(99L, chunks.get(0).getDocumentId());
|
||||
assertEquals(88L, chunks.get(0).getStoreId());
|
||||
assertTrue(Boolean.TRUE.equals(chunks.get(0).getEnabled()));
|
||||
}
|
||||
|
||||
@Test
|
||||
void chunkShouldReturnEmptyListForBlankText() {
|
||||
FixedLengthChunker chunker = new FixedLengthChunker();
|
||||
|
||||
RagChunkCommand command = new RagChunkCommand();
|
||||
command.setDocument(buildDocument());
|
||||
command.setParseResult(buildParseResult(" "));
|
||||
command.setChunkStrategy(RagChunkStrategyEnum.FIXED_LENGTH.getValue());
|
||||
command.setChunkSize(4);
|
||||
command.setChunkOverlap(1);
|
||||
|
||||
assertTrue(chunker.chunk(command).isEmpty());
|
||||
}
|
||||
|
||||
private static RagDocument buildDocument() {
|
||||
RagDocument document = new RagDocument();
|
||||
document.setId(99L);
|
||||
document.setStoreId(88L);
|
||||
return document;
|
||||
}
|
||||
|
||||
private static DocumentParseResult buildParseResult(String text) {
|
||||
DocumentParseResult result = new DocumentParseResult();
|
||||
result.setText(text);
|
||||
result.setTextLength(text.length());
|
||||
return result;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user