From d8079d62775dc39e5a3d129b0e32bb8cf790b48e Mon Sep 17 00:00:00 2001 From: bruce Date: Sun, 24 May 2026 23:17:12 +0800 Subject: [PATCH] =?UTF-8?q?feat(rag):=20=E8=90=BD=E8=A7=A3=E6=9E=90?= =?UTF-8?q?=E5=BF=AB=E7=85=A7=E5=B9=B6=E6=89=93=E9=80=9A=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E8=A7=A3=E6=9E=90=E9=93=BE=E8=B7=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/sql/rag_document_parse_result.sql | 49 ++++++++ .../rag/controller/RagDocumentController.java | 9 ++ .../java/com/bruce/rag/entity/RagChunk.java | 5 +- .../rag/entity/RagDocumentParseResult.java | 62 ++++++++++ .../mapper/RagDocumentParseResultMapper.java | 9 ++ .../service/IRagDocumentAutoParseService.java | 8 ++ .../IRagDocumentParseResultService.java | 14 +++ .../impl/RagDocumentAutoParseServiceImpl.java | 37 ++++++ .../impl/RagDocumentChunkServiceImpl.java | 11 +- .../RagDocumentParseResultServiceImpl.java | 109 ++++++++++++++++++ .../impl/RagDocumentParseServiceImpl.java | 25 +++- .../service/impl/RagDocumentServiceImpl.java | 8 +- .../typehandler/PgJsonbStringTypeHandler.java | 33 ++++++ .../bruce/rag/RagComponentStructureTests.java | 41 +++++++ .../rag/RagDocumentParseServiceImplTests.java | 31 ++--- .../rag/RagDocumentServiceImplTests.java | 4 +- 16 files changed, 427 insertions(+), 28 deletions(-) create mode 100644 script/sql/rag_document_parse_result.sql create mode 100644 src/main/java/com/bruce/rag/entity/RagDocumentParseResult.java create mode 100644 src/main/java/com/bruce/rag/mapper/RagDocumentParseResultMapper.java create mode 100644 src/main/java/com/bruce/rag/service/IRagDocumentAutoParseService.java create mode 100644 src/main/java/com/bruce/rag/service/IRagDocumentParseResultService.java create mode 100644 src/main/java/com/bruce/rag/service/impl/RagDocumentAutoParseServiceImpl.java create mode 100644 src/main/java/com/bruce/rag/service/impl/RagDocumentParseResultServiceImpl.java create mode 100644 src/main/java/com/bruce/rag/typehandler/PgJsonbStringTypeHandler.java diff --git a/script/sql/rag_document_parse_result.sql b/script/sql/rag_document_parse_result.sql new file mode 100644 index 0000000..6745703 --- /dev/null +++ b/script/sql/rag_document_parse_result.sql @@ -0,0 +1,49 @@ +DROP TABLE IF EXISTS rag_document_parse_result; + +CREATE TABLE rag_document_parse_result ( + id BIGSERIAL PRIMARY KEY, + store_id BIGINT NOT NULL, + document_id BIGINT NOT NULL, + parsed_text TEXT NOT NULL, + text_length INTEGER, + page_count INTEGER, + sheet_count INTEGER, + metadata_json JSONB NOT NULL DEFAULT '{}'::jsonb, + content_hash VARCHAR(64), + parse_version INTEGER NOT NULL DEFAULT 1, + enabled BOOLEAN NOT NULL DEFAULT TRUE, + version INTEGER NOT NULL DEFAULT 1, + create_time TIMESTAMP, + update_time TIMESTAMP, + remark VARCHAR(500) DEFAULT '', + create_by VARCHAR(64), + update_by VARCHAR(64), + CONSTRAINT uk_rag_parse_result_document UNIQUE (document_id), + CONSTRAINT fk_rag_parse_result_store_id FOREIGN KEY (store_id) REFERENCES rag_store (id), + CONSTRAINT fk_rag_parse_result_document_id FOREIGN KEY (document_id) REFERENCES rag_document (id) +); + +CREATE INDEX idx_rag_parse_result_store_id ON rag_document_parse_result (store_id); +CREATE INDEX idx_rag_parse_result_document_id ON rag_document_parse_result (document_id); +CREATE INDEX idx_rag_parse_result_content_hash ON rag_document_parse_result (content_hash); +CREATE INDEX idx_rag_parse_result_enabled ON rag_document_parse_result (enabled); +CREATE INDEX idx_rag_parse_result_metadata_json ON rag_document_parse_result USING GIN (metadata_json); + +COMMENT ON TABLE rag_document_parse_result IS 'RAG文档解析结果快照表'; +COMMENT ON COLUMN rag_document_parse_result.id IS 'ID'; +COMMENT ON COLUMN rag_document_parse_result.store_id IS '知识库ID'; +COMMENT ON COLUMN rag_document_parse_result.document_id IS '文档ID'; +COMMENT ON COLUMN rag_document_parse_result.parsed_text IS '解析文本'; +COMMENT ON COLUMN rag_document_parse_result.text_length IS '文本长度'; +COMMENT ON COLUMN rag_document_parse_result.page_count IS '页数'; +COMMENT ON COLUMN rag_document_parse_result.sheet_count IS '工作表数量'; +COMMENT ON COLUMN rag_document_parse_result.metadata_json IS '解析元数据JSON'; +COMMENT ON COLUMN rag_document_parse_result.content_hash IS '解析文本哈希'; +COMMENT ON COLUMN rag_document_parse_result.parse_version IS '解析版本'; +COMMENT ON COLUMN rag_document_parse_result.enabled IS '是否启用'; +COMMENT ON COLUMN rag_document_parse_result.version IS '版本'; +COMMENT ON COLUMN rag_document_parse_result.create_time IS '创建时间'; +COMMENT ON COLUMN rag_document_parse_result.update_time IS '更新时间'; +COMMENT ON COLUMN rag_document_parse_result.remark IS '备注'; +COMMENT ON COLUMN rag_document_parse_result.create_by IS '创建者'; +COMMENT ON COLUMN rag_document_parse_result.update_by IS '更新者'; diff --git a/src/main/java/com/bruce/rag/controller/RagDocumentController.java b/src/main/java/com/bruce/rag/controller/RagDocumentController.java index f41c1af..9f43e8e 100644 --- a/src/main/java/com/bruce/rag/controller/RagDocumentController.java +++ b/src/main/java/com/bruce/rag/controller/RagDocumentController.java @@ -106,6 +106,15 @@ public class RagDocumentController { return RequestResult.success(responses); } + @Operation(summary = "重试解析知识库文档") + @PostMapping("/retryParse") + public RequestResult> retryParse(@RequestBody RagDocumentParseRequest request) { + log.info("RagDocumentController.retryParse start, request={}", request); + List responses = ragDocumentParseService.parse(request); + log.info("RagDocumentController.retryParse success, count={}", responses.size()); + return RequestResult.success(responses); + } + @Operation(summary = "按策略异步切片") @PostMapping("/chunk") public RequestResult chunk(@RequestBody RagDocumentChunkRequest request) { diff --git a/src/main/java/com/bruce/rag/entity/RagChunk.java b/src/main/java/com/bruce/rag/entity/RagChunk.java index fcd3a7a..e718b85 100644 --- a/src/main/java/com/bruce/rag/entity/RagChunk.java +++ b/src/main/java/com/bruce/rag/entity/RagChunk.java @@ -3,6 +3,7 @@ package com.bruce.rag.entity; import com.baomidou.mybatisplus.annotation.TableField; import com.baomidou.mybatisplus.annotation.TableName; import com.bruce.common.domain.model.BaseEntity; +import com.bruce.rag.typehandler.PgJsonbStringTypeHandler; import io.swagger.v3.oas.annotations.media.Schema; import lombok.Data; import lombok.EqualsAndHashCode; @@ -11,7 +12,7 @@ import lombok.NoArgsConstructor; @Data @NoArgsConstructor @EqualsAndHashCode(callSuper = true) -@TableName("rag_chunk") +@TableName(value = "rag_chunk", autoResultMap = true) @Schema(description = "RAG知识切片") public class RagChunk extends BaseEntity { @@ -56,7 +57,7 @@ public class RagChunk extends BaseEntity { private String vectorId; @Schema(description = "切片级扩展元数据JSON") - @TableField("metadata_json") + @TableField(value = "metadata_json", typeHandler = PgJsonbStringTypeHandler.class) private String metadataJson; @Schema(description = "是否启用") diff --git a/src/main/java/com/bruce/rag/entity/RagDocumentParseResult.java b/src/main/java/com/bruce/rag/entity/RagDocumentParseResult.java new file mode 100644 index 0000000..e5b24e0 --- /dev/null +++ b/src/main/java/com/bruce/rag/entity/RagDocumentParseResult.java @@ -0,0 +1,62 @@ +package com.bruce.rag.entity; + +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableName; +import com.bruce.common.domain.model.BaseEntity; +import com.bruce.rag.typehandler.PgJsonbStringTypeHandler; +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; + +@Data +@NoArgsConstructor +@EqualsAndHashCode(callSuper = true) +@TableName(value = "rag_document_parse_result", autoResultMap = true) +@Schema(description = "RAG文档解析结果快照") +public class RagDocumentParseResult extends BaseEntity { + + @Schema(description = "知识库ID") + @TableField("store_id") + private Long storeId; + + @Schema(description = "文档ID") + @TableField("document_id") + private Long documentId; + + @Schema(description = "解析文本") + @TableField("parsed_text") + private String parsedText; + + @Schema(description = "文本长度") + @TableField("text_length") + private Integer textLength; + + @Schema(description = "页数") + @TableField("page_count") + private Integer pageCount; + + @Schema(description = "工作表数量") + @TableField("sheet_count") + private Integer sheetCount; + + @Schema(description = "解析元数据JSON") + @TableField(value = "metadata_json", typeHandler = PgJsonbStringTypeHandler.class) + private String metadataJson; + + @Schema(description = "解析结果哈希") + @TableField("content_hash") + private String contentHash; + + @Schema(description = "解析版本") + @TableField("parse_version") + private Integer parseVersion; + + @Schema(description = "是否启用") + @TableField("enabled") + private Boolean enabled; + + @Schema(description = "备注") + @TableField("remark") + private String remark; +} diff --git a/src/main/java/com/bruce/rag/mapper/RagDocumentParseResultMapper.java b/src/main/java/com/bruce/rag/mapper/RagDocumentParseResultMapper.java new file mode 100644 index 0000000..ff3120e --- /dev/null +++ b/src/main/java/com/bruce/rag/mapper/RagDocumentParseResultMapper.java @@ -0,0 +1,9 @@ +package com.bruce.rag.mapper; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import com.bruce.rag.entity.RagDocumentParseResult; +import org.apache.ibatis.annotations.Mapper; + +@Mapper +public interface RagDocumentParseResultMapper extends BaseMapper { +} diff --git a/src/main/java/com/bruce/rag/service/IRagDocumentAutoParseService.java b/src/main/java/com/bruce/rag/service/IRagDocumentAutoParseService.java new file mode 100644 index 0000000..1ad4374 --- /dev/null +++ b/src/main/java/com/bruce/rag/service/IRagDocumentAutoParseService.java @@ -0,0 +1,8 @@ +package com.bruce.rag.service; + +import java.util.List; + +public interface IRagDocumentAutoParseService { + + void parseUploadedDocuments(List documentIds); +} diff --git a/src/main/java/com/bruce/rag/service/IRagDocumentParseResultService.java b/src/main/java/com/bruce/rag/service/IRagDocumentParseResultService.java new file mode 100644 index 0000000..0bbc9f5 --- /dev/null +++ b/src/main/java/com/bruce/rag/service/IRagDocumentParseResultService.java @@ -0,0 +1,14 @@ +package com.bruce.rag.service; + +import com.baomidou.mybatisplus.extension.service.IService; +import com.bruce.common.document.parse.DocumentParseResult; +import com.bruce.rag.entity.RagDocumentParseResult; + +public interface IRagDocumentParseResultService extends IService { + + RagDocumentParseResult getByDocumentId(Long documentId); + + void saveSnapshot(Long storeId, Long documentId, DocumentParseResult parseResult); + + DocumentParseResult toParseResult(RagDocumentParseResult snapshot); +} diff --git a/src/main/java/com/bruce/rag/service/impl/RagDocumentAutoParseServiceImpl.java b/src/main/java/com/bruce/rag/service/impl/RagDocumentAutoParseServiceImpl.java new file mode 100644 index 0000000..d68893a --- /dev/null +++ b/src/main/java/com/bruce/rag/service/impl/RagDocumentAutoParseServiceImpl.java @@ -0,0 +1,37 @@ +package com.bruce.rag.service.impl; + +import com.bruce.rag.service.IRagDocumentAutoParseService; +import com.bruce.rag.service.IRagDocumentParseService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.scheduling.annotation.Async; +import org.springframework.stereotype.Service; + +import java.util.List; + +@Slf4j +@Service +@RequiredArgsConstructor +public class RagDocumentAutoParseServiceImpl implements IRagDocumentAutoParseService { + + private final IRagDocumentParseService ragDocumentParseService; + + @Override + @Async + public void parseUploadedDocuments(List documentIds) { + if (documentIds == null || documentIds.isEmpty()) { + return; + } + for (Long documentId : documentIds) { + if (documentId == null) { + continue; + } + try { + ragDocumentParseService.parse(documentId); + } catch (RuntimeException e) { + log.warn("RagDocumentAutoParseServiceImpl.parseUploadedDocuments failed, documentId={}, message={}", + documentId, e.getMessage()); + } + } + } +} diff --git a/src/main/java/com/bruce/rag/service/impl/RagDocumentChunkServiceImpl.java b/src/main/java/com/bruce/rag/service/impl/RagDocumentChunkServiceImpl.java index 729751f..9320be5 100644 --- a/src/main/java/com/bruce/rag/service/impl/RagDocumentChunkServiceImpl.java +++ b/src/main/java/com/bruce/rag/service/impl/RagDocumentChunkServiceImpl.java @@ -5,13 +5,14 @@ import com.bruce.common.document.parse.DocumentParseResult; import com.bruce.rag.dto.request.RagDocumentChunkRequest; import com.bruce.rag.entity.RagChunk; import com.bruce.rag.entity.RagDocument; +import com.bruce.rag.entity.RagDocumentParseResult; import com.bruce.rag.enums.RagChunkStrategyEnum; import com.bruce.rag.parse.Chunker; import com.bruce.rag.parse.ChunkerFactory; import com.bruce.rag.parse.RagChunkCommand; import com.bruce.rag.service.IRagChunkService; import com.bruce.rag.service.IRagDocumentChunkService; -import com.bruce.rag.service.IRagDocumentParseService; +import com.bruce.rag.service.IRagDocumentParseResultService; import com.bruce.rag.service.IRagDocumentService; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -28,7 +29,7 @@ public class RagDocumentChunkServiceImpl implements IRagDocumentChunkService { private final IRagDocumentService ragDocumentService; - private final IRagDocumentParseService ragDocumentParseService; + private final IRagDocumentParseResultService ragDocumentParseResultService; private final ChunkerFactory chunkerFactory; @@ -47,7 +48,11 @@ public class RagDocumentChunkServiceImpl implements IRagDocumentChunkService { log.warn("RagDocumentChunkServiceImpl.chunkAsync document not found, documentId={}", documentId); continue; } - DocumentParseResult parseResult = ragDocumentParseService.parseDocumentResult(documentId); + RagDocumentParseResult snapshot = ragDocumentParseResultService.getByDocumentId(documentId); + if (snapshot == null) { + throw new IllegalStateException("文档尚未生成解析快照,documentId=" + documentId); + } + DocumentParseResult parseResult = ragDocumentParseResultService.toParseResult(snapshot); RagChunkCommand command = new RagChunkCommand(); command.setDocument(document); command.setParseResult(parseResult); diff --git a/src/main/java/com/bruce/rag/service/impl/RagDocumentParseResultServiceImpl.java b/src/main/java/com/bruce/rag/service/impl/RagDocumentParseResultServiceImpl.java new file mode 100644 index 0000000..177eff4 --- /dev/null +++ b/src/main/java/com/bruce/rag/service/impl/RagDocumentParseResultServiceImpl.java @@ -0,0 +1,109 @@ +package com.bruce.rag.service.impl; + +import com.baomidou.mybatisplus.core.toolkit.Wrappers; +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import com.bruce.common.document.parse.DocumentParseResult; +import com.bruce.rag.entity.RagDocumentParseResult; +import com.bruce.rag.mapper.RagDocumentParseResultMapper; +import com.bruce.rag.service.IRagDocumentParseResultService; +import lombok.RequiredArgsConstructor; +import org.springframework.stereotype.Service; +import org.springframework.util.DigestUtils; +import org.springframework.util.StringUtils; +import tools.jackson.core.type.TypeReference; +import tools.jackson.databind.ObjectMapper; + +import java.nio.charset.StandardCharsets; +import java.util.LinkedHashMap; +import java.util.Map; + +@Service +@RequiredArgsConstructor +public class RagDocumentParseResultServiceImpl extends ServiceImpl + implements IRagDocumentParseResultService { + + private final ObjectMapper objectMapper; + + @Override + public RagDocumentParseResult getByDocumentId(Long documentId) { + if (documentId == null) { + return null; + } + return getOne(Wrappers.lambdaQuery() + .eq(RagDocumentParseResult::getDocumentId, documentId) + .last("limit 1")); + } + + @Override + public void saveSnapshot(Long storeId, Long documentId, DocumentParseResult parseResult) { + if (storeId == null || documentId == null || parseResult == null) { + throw new IllegalArgumentException("保存解析快照参数不完整"); + } + RagDocumentParseResult existing = getByDocumentId(documentId); + RagDocumentParseResult snapshot = existing == null ? new RagDocumentParseResult() : existing; + snapshot.setStoreId(storeId); + snapshot.setDocumentId(documentId); + snapshot.setParsedText(parseResult.getText()); + snapshot.setTextLength(parseResult.getTextLength()); + snapshot.setPageCount(parseResult.getPageCount()); + snapshot.setSheetCount(parseResult.getSheetCount()); + snapshot.setMetadataJson(toJson(parseResult.getMetadata())); + snapshot.setContentHash(buildHash(parseResult.getText())); + snapshot.setParseVersion(resolveNextVersion(existing)); + snapshot.setEnabled(Boolean.TRUE); + if (snapshot.getId() == null) { + save(snapshot); + } else { + updateById(snapshot); + } + } + + @Override + public DocumentParseResult toParseResult(RagDocumentParseResult snapshot) { + if (snapshot == null) { + return null; + } + DocumentParseResult result = new DocumentParseResult(); + result.setText(snapshot.getParsedText()); + result.setTextLength(snapshot.getTextLength()); + result.setPageCount(snapshot.getPageCount()); + result.setSheetCount(snapshot.getSheetCount()); + result.setMetadata(fromJson(snapshot.getMetadataJson())); + return result; + } + + private Integer resolveNextVersion(RagDocumentParseResult existing) { + if (existing == null || existing.getParseVersion() == null || existing.getParseVersion() < 1) { + return 1; + } + return existing.getParseVersion() + 1; + } + + private String buildHash(String text) { + if (!StringUtils.hasText(text)) { + return null; + } + return DigestUtils.md5DigestAsHex(text.getBytes(StandardCharsets.UTF_8)); + } + + private String toJson(Map metadata) { + try { + Map payload = metadata == null ? new LinkedHashMap<>() : metadata; + return objectMapper.writeValueAsString(payload); + } catch (Exception e) { + throw new IllegalStateException("解析元数据序列化失败", e); + } + } + + private Map fromJson(String metadataJson) { + if (!StringUtils.hasText(metadataJson)) { + return new LinkedHashMap<>(); + } + try { + return objectMapper.readValue(metadataJson, new TypeReference<>() { + }); + } catch (Exception e) { + throw new IllegalStateException("解析元数据反序列化失败", e); + } + } +} diff --git a/src/main/java/com/bruce/rag/service/impl/RagDocumentParseServiceImpl.java b/src/main/java/com/bruce/rag/service/impl/RagDocumentParseServiceImpl.java index 82c4867..bdd4604 100644 --- a/src/main/java/com/bruce/rag/service/impl/RagDocumentParseServiceImpl.java +++ b/src/main/java/com/bruce/rag/service/impl/RagDocumentParseServiceImpl.java @@ -11,9 +11,11 @@ import com.bruce.common.service.ISysAttachmentService; import com.bruce.rag.dto.request.RagDocumentParseRequest; import com.bruce.rag.dto.response.RagDocumentParseResponse; import com.bruce.rag.entity.RagDocument; +import com.bruce.rag.entity.RagDocumentParseResult; import com.bruce.rag.enums.RagParseStatusEnum; +import com.bruce.rag.mapper.RagDocumentMapper; import com.bruce.rag.service.IRagDocumentParseService; -import com.bruce.rag.service.IRagDocumentService; +import com.bruce.rag.service.IRagDocumentParseResultService; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; @@ -28,7 +30,7 @@ import java.util.List; @RequiredArgsConstructor public class RagDocumentParseServiceImpl implements IRagDocumentParseService { - private final IRagDocumentService ragDocumentService; + private final RagDocumentMapper ragDocumentMapper; private final ISysAttachmentService sysAttachmentService; @@ -36,6 +38,8 @@ public class RagDocumentParseServiceImpl implements IRagDocumentParseService { private final DocumentParserFactory documentParserFactory; + private final IRagDocumentParseResultService ragDocumentParseResultService; + @Override public List parse(RagDocumentParseRequest request) { log.info("RagDocumentParseServiceImpl.parse batch start, request={}", request); @@ -49,6 +53,10 @@ public class RagDocumentParseServiceImpl implements IRagDocumentParseService { @Override public DocumentParseResult parseDocumentResult(Long documentId) { + RagDocumentParseResult snapshot = ragDocumentParseResultService.getByDocumentId(documentId); + if (snapshot != null) { + return ragDocumentParseResultService.toParseResult(snapshot); + } return doParse(documentId); } @@ -67,7 +75,7 @@ public class RagDocumentParseServiceImpl implements IRagDocumentParseService { throw new IllegalArgumentException("文档ID不能为空"); } - RagDocument document = ragDocumentService.getById(documentId); + RagDocument document = ragDocumentMapper.selectById(documentId); if (document == null) { throw new IllegalArgumentException("文档不存在,ID: " + documentId); } @@ -85,6 +93,7 @@ public class RagDocumentParseServiceImpl implements IRagDocumentParseService { DocumentParseContext context = buildParseContext(document, attachment); DocumentParser parser = documentParserFactory.resolve(context); DocumentParseResult result = parser.parse(context); + ragDocumentParseResultService.saveSnapshot(document.getStoreId(), documentId, result); updateParseStatus(documentId, RagParseStatusEnum.PARSED, null); return result; } catch (RuntimeException e) { @@ -131,11 +140,19 @@ public class RagDocumentParseServiceImpl implements IRagDocumentParseService { } private void updateParseStatus(Long documentId, RagParseStatusEnum status, String errorMessage) { + RagDocument current = ragDocumentMapper.selectById(documentId); + if (current == null) { + throw new IllegalArgumentException("文档不存在,ID: " + documentId); + } RagDocument update = new RagDocument(); update.setId(documentId); + update.setVersion(current.getVersion()); update.setParseStatus(status.name()); update.setErrorMessage(StringUtils.hasText(errorMessage) ? errorMessage : null); - ragDocumentService.updateById(update); + boolean updated = ragDocumentMapper.updateById(update) > 0; + if (!updated) { + throw new IllegalStateException("更新解析状态失败,文档ID: " + documentId + ", 状态: " + status.name()); + } } private RagDocumentParseResponse toResponse(Long documentId, DocumentParseResult result) { diff --git a/src/main/java/com/bruce/rag/service/impl/RagDocumentServiceImpl.java b/src/main/java/com/bruce/rag/service/impl/RagDocumentServiceImpl.java index 45abcd4..247b07c 100644 --- a/src/main/java/com/bruce/rag/service/impl/RagDocumentServiceImpl.java +++ b/src/main/java/com/bruce/rag/service/impl/RagDocumentServiceImpl.java @@ -10,14 +10,13 @@ import com.bruce.rag.dto.request.RagDocumentQueryRequest; import com.bruce.rag.dto.request.RagDocumentSaveRequest; import com.bruce.rag.dto.response.RagDocumentResponse; import com.bruce.rag.entity.RagDocument; -import com.bruce.rag.event.RagDocumentUploadedEvent; import com.bruce.rag.enums.RagIndexStatusEnum; import com.bruce.rag.enums.RagParseStatusEnum; import com.bruce.rag.mapper.RagDocumentMapper; +import com.bruce.rag.service.IRagDocumentAutoParseService; import com.bruce.rag.service.IRagDocumentService; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.ApplicationEventPublisher; import org.springframework.stereotype.Service; import org.springframework.util.StringUtils; @@ -32,7 +31,7 @@ public class RagDocumentServiceImpl extends ServiceImpl listResponses() { @@ -174,8 +173,9 @@ public class RagDocumentServiceImpl extends ServiceImpl documentIds = results.stream() .map(RagDocumentResponse::getId) + .filter(id -> id != null) .toList(); - eventPublisher.publishEvent(new RagDocumentUploadedEvent(documentIds)); + ragDocumentAutoParseService.parseUploadedDocuments(documentIds); } log.info("RagDocumentServiceImpl.batchUpload success, storeId={}, uploaded={}", diff --git a/src/main/java/com/bruce/rag/typehandler/PgJsonbStringTypeHandler.java b/src/main/java/com/bruce/rag/typehandler/PgJsonbStringTypeHandler.java new file mode 100644 index 0000000..456be2c --- /dev/null +++ b/src/main/java/com/bruce/rag/typehandler/PgJsonbStringTypeHandler.java @@ -0,0 +1,33 @@ +package com.bruce.rag.typehandler; + +import org.apache.ibatis.type.BaseTypeHandler; +import org.apache.ibatis.type.JdbcType; + +import java.sql.CallableStatement; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Types; + +public class PgJsonbStringTypeHandler extends BaseTypeHandler { + + @Override + public void setNonNullParameter(PreparedStatement ps, int i, String parameter, JdbcType jdbcType) throws SQLException { + ps.setObject(i, parameter, Types.OTHER); + } + + @Override + public String getNullableResult(ResultSet rs, String columnName) throws SQLException { + return rs.getString(columnName); + } + + @Override + public String getNullableResult(ResultSet rs, int columnIndex) throws SQLException { + return rs.getString(columnIndex); + } + + @Override + public String getNullableResult(CallableStatement cs, int columnIndex) throws SQLException { + return cs.getString(columnIndex); + } +} diff --git a/src/test/java/com/bruce/rag/RagComponentStructureTests.java b/src/test/java/com/bruce/rag/RagComponentStructureTests.java index c9f586a..a839ee1 100644 --- a/src/test/java/com/bruce/rag/RagComponentStructureTests.java +++ b/src/test/java/com/bruce/rag/RagComponentStructureTests.java @@ -3,6 +3,8 @@ package com.bruce.rag; import com.baomidou.mybatisplus.core.mapper.BaseMapper; import com.baomidou.mybatisplus.extension.service.IService; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableName; import com.bruce.common.domain.model.RequestResult; import com.bruce.rag.constant.RagSystemConstants; import com.bruce.rag.controller.RagDocumentController; @@ -19,20 +21,25 @@ import com.bruce.rag.dto.response.RagStoreResponse; import com.bruce.rag.entity.RagChunk; import com.bruce.rag.entity.RagChunkEmbedding; import com.bruce.rag.entity.RagDocument; +import com.bruce.rag.entity.RagDocumentParseResult; import com.bruce.rag.entity.RagStore; import com.bruce.rag.mapper.RagChunkEmbeddingMapper; import com.bruce.rag.mapper.RagChunkMapper; import com.bruce.rag.mapper.RagDocumentMapper; +import com.bruce.rag.mapper.RagDocumentParseResultMapper; import com.bruce.rag.mapper.RagStoreMapper; import com.bruce.rag.service.IRagChunkEmbeddingService; import com.bruce.rag.service.IRagChunkService; import com.bruce.rag.service.IRagDocumentParseService; +import com.bruce.rag.service.IRagDocumentParseResultService; import com.bruce.rag.service.IRagDocumentService; import com.bruce.rag.service.IRagStoreService; import com.bruce.rag.service.impl.RagChunkEmbeddingServiceImpl; import com.bruce.rag.service.impl.RagChunkServiceImpl; +import com.bruce.rag.service.impl.RagDocumentParseResultServiceImpl; import com.bruce.rag.service.impl.RagDocumentServiceImpl; import com.bruce.rag.service.impl.RagStoreServiceImpl; +import com.bruce.rag.typehandler.PgJsonbStringTypeHandler; import org.junit.jupiter.api.Test; import org.springframework.web.bind.annotation.PostMapping; @@ -50,14 +57,17 @@ class RagComponentStructureTests { void ragComponentsShouldReuseMybatisPlusBaseTypes() { assertTrue(BaseMapper.class.isAssignableFrom(RagStoreMapper.class)); assertTrue(BaseMapper.class.isAssignableFrom(RagDocumentMapper.class)); + assertTrue(BaseMapper.class.isAssignableFrom(RagDocumentParseResultMapper.class)); assertTrue(BaseMapper.class.isAssignableFrom(RagChunkMapper.class)); assertTrue(BaseMapper.class.isAssignableFrom(RagChunkEmbeddingMapper.class)); assertTrue(IService.class.isAssignableFrom(IRagStoreService.class)); assertTrue(IService.class.isAssignableFrom(IRagDocumentService.class)); + assertTrue(IService.class.isAssignableFrom(IRagDocumentParseResultService.class)); assertTrue(IService.class.isAssignableFrom(IRagChunkService.class)); assertTrue(IService.class.isAssignableFrom(IRagChunkEmbeddingService.class)); assertTrue(ServiceImpl.class.isAssignableFrom(RagStoreServiceImpl.class)); assertTrue(ServiceImpl.class.isAssignableFrom(RagDocumentServiceImpl.class)); + assertTrue(ServiceImpl.class.isAssignableFrom(RagDocumentParseResultServiceImpl.class)); assertTrue(ServiceImpl.class.isAssignableFrom(RagChunkServiceImpl.class)); assertTrue(ServiceImpl.class.isAssignableFrom(RagChunkEmbeddingServiceImpl.class)); } @@ -173,4 +183,35 @@ class RagComponentStructureTests { assertEquals(Boolean.class, RagChunkEmbedding.class.getDeclaredField("enabled").getType()); assertEquals(String.class, RagChunkEmbedding.class.getDeclaredField("remark").getType()); } + + @Test + void ragParseResultStructureShouldSupportSnapshotMetadata() throws NoSuchFieldException { + assertEquals(Long.class, RagDocumentParseResult.class.getDeclaredField("storeId").getType()); + assertEquals(Long.class, RagDocumentParseResult.class.getDeclaredField("documentId").getType()); + assertEquals(String.class, RagDocumentParseResult.class.getDeclaredField("parsedText").getType()); + assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("textLength").getType()); + assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("pageCount").getType()); + assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("sheetCount").getType()); + assertEquals(String.class, RagDocumentParseResult.class.getDeclaredField("metadataJson").getType()); + assertEquals(String.class, RagDocumentParseResult.class.getDeclaredField("contentHash").getType()); + assertEquals(Integer.class, RagDocumentParseResult.class.getDeclaredField("parseVersion").getType()); + assertEquals(Boolean.class, RagDocumentParseResult.class.getDeclaredField("enabled").getType()); + } + + @Test + void ragMetadataJsonFieldsShouldUseJsonbTypeHandler() throws NoSuchFieldException { + TableName chunkTable = RagChunk.class.getAnnotation(TableName.class); + TableName parseResultTable = RagDocumentParseResult.class.getAnnotation(TableName.class); + TableField chunkMetadataField = RagChunk.class.getDeclaredField("metadataJson").getAnnotation(TableField.class); + TableField parseResultMetadataField = RagDocumentParseResult.class.getDeclaredField("metadataJson").getAnnotation(TableField.class); + + assertNotNull(chunkTable); + assertNotNull(parseResultTable); + assertTrue(chunkTable.autoResultMap()); + assertTrue(parseResultTable.autoResultMap()); + assertNotNull(chunkMetadataField); + assertNotNull(parseResultMetadataField); + assertEquals(PgJsonbStringTypeHandler.class, chunkMetadataField.typeHandler()); + assertEquals(PgJsonbStringTypeHandler.class, parseResultMetadataField.typeHandler()); + } } diff --git a/src/test/java/com/bruce/rag/RagDocumentParseServiceImplTests.java b/src/test/java/com/bruce/rag/RagDocumentParseServiceImplTests.java index 991f436..845e0a9 100644 --- a/src/test/java/com/bruce/rag/RagDocumentParseServiceImplTests.java +++ b/src/test/java/com/bruce/rag/RagDocumentParseServiceImplTests.java @@ -10,8 +10,9 @@ import com.bruce.common.service.ISysAttachmentService; import com.bruce.rag.dto.request.RagDocumentParseRequest; import com.bruce.rag.dto.response.RagDocumentParseResponse; import com.bruce.rag.entity.RagDocument; +import com.bruce.rag.mapper.RagDocumentMapper; +import com.bruce.rag.service.IRagDocumentParseResultService; import com.bruce.rag.enums.RagParseStatusEnum; -import com.bruce.rag.service.IRagDocumentService; import com.bruce.rag.service.impl.RagDocumentParseServiceImpl; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -32,6 +33,7 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.times; import static org.mockito.Mockito.when; +import static org.mockito.Mockito.mock; @ExtendWith(MockitoExtension.class) class RagDocumentParseServiceImplTests { @@ -40,7 +42,7 @@ class RagDocumentParseServiceImplTests { private Path tempDir; @Mock - private IRagDocumentService ragDocumentService; + private RagDocumentMapper ragDocumentMapper; @Mock private ISysAttachmentService sysAttachmentService; @@ -68,15 +70,16 @@ class RagDocumentParseServiceImplTests { attachmentProperties.setBasePath(tempDir.toString()); DocumentParser parser = new FixedDocumentParser("people profiles"); RagDocumentParseServiceImpl service = new RagDocumentParseServiceImpl( - ragDocumentService, + ragDocumentMapper, sysAttachmentService, attachmentProperties, - new DocumentParserFactory(List.of(parser)) + new DocumentParserFactory(List.of(parser)), + mock(IRagDocumentParseResultService.class) ); - when(ragDocumentService.getById(1001L)).thenReturn(document); + when(ragDocumentMapper.selectById(1001L)).thenReturn(document); when(sysAttachmentService.getById(3003L)).thenReturn(attachment); - when(ragDocumentService.updateById(any(RagDocument.class))).thenReturn(true); + when(ragDocumentMapper.updateById(any(RagDocument.class))).thenReturn(1); RagDocumentParseResponse response = service.parse(1001L); @@ -86,7 +89,7 @@ class RagDocumentParseServiceImplTests { assertEquals("fixed", response.getMetadata().get("parser")); ArgumentCaptor captor = ArgumentCaptor.forClass(RagDocument.class); - verify(ragDocumentService, times(2)).updateById(captor.capture()); + verify(ragDocumentMapper, times(2)).updateById(captor.capture()); List updates = captor.getAllValues(); assertEquals(RagParseStatusEnum.PARSING.name(), updates.get(0).getParseStatus()); assertEquals(RagParseStatusEnum.PARSED.name(), updates.get(1).getParseStatus()); @@ -115,17 +118,18 @@ class RagDocumentParseServiceImplTests { AttachmentProperties attachmentProperties = new AttachmentProperties(); attachmentProperties.setBasePath(tempDir.toString()); RagDocumentParseServiceImpl service = new RagDocumentParseServiceImpl( - ragDocumentService, + ragDocumentMapper, sysAttachmentService, attachmentProperties, - new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles"))) + new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles"))), + mock(IRagDocumentParseResultService.class) ); RagDocumentParseRequest request = new RagDocumentParseRequest(); request.setDocumentIds(List.of(1002L)); - when(ragDocumentService.getById(1002L)).thenReturn(document); + when(ragDocumentMapper.selectById(1002L)).thenReturn(document); when(sysAttachmentService.getById(3004L)).thenReturn(attachment); - when(ragDocumentService.updateById(any(RagDocument.class))).thenReturn(true); + when(ragDocumentMapper.updateById(any(RagDocument.class))).thenReturn(1); List responses = service.parse(request); @@ -139,10 +143,11 @@ class RagDocumentParseServiceImplTests { AttachmentProperties attachmentProperties = new AttachmentProperties(); attachmentProperties.setBasePath(tempDir.toString()); RagDocumentParseServiceImpl service = new RagDocumentParseServiceImpl( - ragDocumentService, + ragDocumentMapper, sysAttachmentService, attachmentProperties, - new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles"))) + new DocumentParserFactory(List.of(new FixedDocumentParser("batch profiles"))), + mock(IRagDocumentParseResultService.class) ); RagDocumentParseRequest request = new RagDocumentParseRequest(); request.setDocumentIds(List.of()); diff --git a/src/test/java/com/bruce/rag/RagDocumentServiceImplTests.java b/src/test/java/com/bruce/rag/RagDocumentServiceImplTests.java index f56c39c..7099175 100644 --- a/src/test/java/com/bruce/rag/RagDocumentServiceImplTests.java +++ b/src/test/java/com/bruce/rag/RagDocumentServiceImplTests.java @@ -9,6 +9,7 @@ import com.bruce.rag.dto.request.RagDocumentSaveRequest; import com.bruce.rag.entity.RagDocument; import com.bruce.rag.enums.RagIndexStatusEnum; import com.bruce.rag.enums.RagParseStatusEnum; +import com.bruce.rag.service.IRagDocumentAutoParseService; import com.bruce.rag.service.impl.RagDocumentServiceImpl; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -18,7 +19,6 @@ import org.mockito.Mock; import org.mockito.Spy; import org.mockito.junit.jupiter.MockitoExtension; import org.springframework.mock.web.MockMultipartFile; -import org.springframework.context.ApplicationEventPublisher; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; @@ -40,7 +40,7 @@ class RagDocumentServiceImplTests { private ISysAttachmentService sysAttachmentService; @Mock - private ApplicationEventPublisher eventPublisher; + private IRagDocumentAutoParseService ragDocumentAutoParseService; @Test void batchUploadShouldUseRagSourceTypeAndStoreIdAsSourceId() {