feat(rag): 落解析快照并打通自动解析链路

This commit is contained in:
2026-05-24 23:17:12 +08:00
parent e51903efbe
commit d8079d6277
16 changed files with 427 additions and 28 deletions

View File

@@ -0,0 +1,49 @@
DROP TABLE IF EXISTS rag_document_parse_result;
CREATE TABLE rag_document_parse_result (
id BIGSERIAL PRIMARY KEY,
store_id BIGINT NOT NULL,
document_id BIGINT NOT NULL,
parsed_text TEXT NOT NULL,
text_length INTEGER,
page_count INTEGER,
sheet_count INTEGER,
metadata_json JSONB NOT NULL DEFAULT '{}'::jsonb,
content_hash VARCHAR(64),
parse_version INTEGER NOT NULL DEFAULT 1,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
version INTEGER NOT NULL DEFAULT 1,
create_time TIMESTAMP,
update_time TIMESTAMP,
remark VARCHAR(500) DEFAULT '',
create_by VARCHAR(64),
update_by VARCHAR(64),
CONSTRAINT uk_rag_parse_result_document UNIQUE (document_id),
CONSTRAINT fk_rag_parse_result_store_id FOREIGN KEY (store_id) REFERENCES rag_store (id),
CONSTRAINT fk_rag_parse_result_document_id FOREIGN KEY (document_id) REFERENCES rag_document (id)
);
CREATE INDEX idx_rag_parse_result_store_id ON rag_document_parse_result (store_id);
CREATE INDEX idx_rag_parse_result_document_id ON rag_document_parse_result (document_id);
CREATE INDEX idx_rag_parse_result_content_hash ON rag_document_parse_result (content_hash);
CREATE INDEX idx_rag_parse_result_enabled ON rag_document_parse_result (enabled);
CREATE INDEX idx_rag_parse_result_metadata_json ON rag_document_parse_result USING GIN (metadata_json);
COMMENT ON TABLE rag_document_parse_result IS 'RAG文档解析结果快照表';
COMMENT ON COLUMN rag_document_parse_result.id IS 'ID';
COMMENT ON COLUMN rag_document_parse_result.store_id IS '知识库ID';
COMMENT ON COLUMN rag_document_parse_result.document_id IS '文档ID';
COMMENT ON COLUMN rag_document_parse_result.parsed_text IS '解析文本';
COMMENT ON COLUMN rag_document_parse_result.text_length IS '文本长度';
COMMENT ON COLUMN rag_document_parse_result.page_count IS '页数';
COMMENT ON COLUMN rag_document_parse_result.sheet_count IS '工作表数量';
COMMENT ON COLUMN rag_document_parse_result.metadata_json IS '解析元数据JSON';
COMMENT ON COLUMN rag_document_parse_result.content_hash IS '解析文本哈希';
COMMENT ON COLUMN rag_document_parse_result.parse_version IS '解析版本';
COMMENT ON COLUMN rag_document_parse_result.enabled IS '是否启用';
COMMENT ON COLUMN rag_document_parse_result.version IS '版本';
COMMENT ON COLUMN rag_document_parse_result.create_time IS '创建时间';
COMMENT ON COLUMN rag_document_parse_result.update_time IS '更新时间';
COMMENT ON COLUMN rag_document_parse_result.remark IS '备注';
COMMENT ON COLUMN rag_document_parse_result.create_by IS '创建者';
COMMENT ON COLUMN rag_document_parse_result.update_by IS '更新者';