feat(file-summary): 实现文件处理技能链路

This commit is contained in:
2026-06-06 01:20:26 +08:00
parent 51e7c0c007
commit 18d045d487
19 changed files with 604 additions and 9 deletions

View File

@@ -0,0 +1,25 @@
from zipfile import ZipFile
import pytest
from review_agent.file_summary.services.archive import extract_archive
def test_extract_zip_preserves_safe_paths(tmp_path):
archive_path = tmp_path / "safe.zip"
with ZipFile(archive_path, "w") as archive:
archive.writestr("dir/a.txt", "content")
target = tmp_path / "out"
extracted = extract_archive(archive_path, target)
assert extracted == [target / "dir" / "a.txt"]
assert (target / "dir" / "a.txt").read_text(encoding="utf-8") == "content"
def test_extract_zip_rejects_path_traversal(tmp_path):
archive_path = tmp_path / "evil.zip"
with ZipFile(archive_path, "w") as archive:
archive.writestr("../evil.txt", "bad")
with pytest.raises(ValueError):
extract_archive(archive_path, tmp_path / "out")

View File

@@ -0,0 +1,24 @@
from pathlib import Path
import pytest
from review_agent.file_summary.services.inventory import scan_files_to_items
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
pytestmark = pytest.mark.django_db
def test_scan_files_to_items_preserves_relative_paths(tmp_path, django_user_model):
root = tmp_path / "work"
(root / "a").mkdir(parents=True)
(root / "a" / "one.pdf").write_bytes(b"pdf")
(root / "two.txt").write_text("x", encoding="utf-8")
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-I")
items = scan_files_to_items(batch=batch, roots=[root])
assert [item.relative_path for item in items] == ["a/one.pdf", "two.txt"]
assert FileSummaryItem.objects.filter(batch=batch).count() == 2
assert items[0].statistics_status == FileSummaryItem.StatisticsStatus.SKIPPED

View File

@@ -0,0 +1,66 @@
import pytest
from docx import Document
from openpyxl import Workbook
from pptx import Presentation
from review_agent.file_summary.services.page_count import count_document_pages
from review_agent.file_summary.skills.document_page_count import DocumentPageCountSkill
from review_agent.file_summary.skills.base import WorkflowContext
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
pytestmark = pytest.mark.django_db
def test_count_document_pages_for_office_formats(tmp_path):
docx_path = tmp_path / "a.docx"
Document().save(docx_path)
xlsx_path = tmp_path / "a.xlsx"
workbook = Workbook()
workbook.create_sheet("第二页")
workbook.save(xlsx_path)
pptx_path = tmp_path / "a.pptx"
presentation = Presentation()
presentation.slides.add_slide(presentation.slide_layouts[6])
presentation.save(pptx_path)
assert count_document_pages(docx_path).status in {"success", "uncertain"}
assert count_document_pages(xlsx_path).page_count == 2
assert count_document_pages(pptx_path).page_count == 1
def test_document_page_count_skill_marks_unsupported_and_success(tmp_path, django_user_model):
xlsx_path = tmp_path / "a.xlsx"
workbook = Workbook()
workbook.save(xlsx_path)
txt_path = tmp_path / "a.txt"
txt_path.write_text("x", encoding="utf-8")
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-P")
xlsx_item = FileSummaryItem.objects.create(
batch=batch,
file_index=1,
file_name="a.xlsx",
file_type="xlsx",
relative_path="a.xlsx",
storage_path=str(xlsx_path),
)
txt_item = FileSummaryItem.objects.create(
batch=batch,
file_index=2,
file_name="a.txt",
file_type="txt",
relative_path="a.txt",
storage_path=str(txt_path),
)
result = DocumentPageCountSkill().run(WorkflowContext(batch=batch))
xlsx_item.refresh_from_db()
txt_item.refresh_from_db()
assert result.success is True
assert xlsx_item.statistics_status == FileSummaryItem.StatisticsStatus.SUCCESS
assert txt_item.statistics_status == FileSummaryItem.StatisticsStatus.UNSUPPORTED

View File

@@ -0,0 +1,29 @@
import pytest
from review_agent.file_summary.services.product_detect import detect_product_name
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
pytestmark = pytest.mark.django_db
def test_detect_product_name_from_top_level_directory(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="新对话 06-06")
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-D")
FileSummaryItem.objects.create(
batch=batch,
file_index=1,
file_name="说明书.docx",
file_type="docx",
relative_path="甲型试剂盒/说明书.docx",
storage_path="x",
)
product_name = detect_product_name(batch)
batch.refresh_from_db()
conversation.refresh_from_db()
assert product_name == "甲型试剂盒"
assert batch.product_name == "甲型试剂盒"
assert conversation.title == "甲型试剂盒-文件汇总"

View File

@@ -0,0 +1,27 @@
import pytest
from review_agent.file_summary.skills.base import BaseSkill, SkillResult, WorkflowContext
from review_agent.file_summary.skills.registry import SkillRegistry
class EchoSkill(BaseSkill):
name = "echo"
def run(self, context):
return SkillResult(success=True, data={"batch_id": context.batch.id})
@pytest.mark.django_db
def test_skill_registry_executes_registered_skill(django_user_model):
from review_agent.models import Conversation, FileSummaryBatch
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-X")
registry = SkillRegistry()
registry.register(EchoSkill())
result = registry.execute("echo", WorkflowContext(batch=batch))
assert result.success is True
assert result.data == {"batch_id": batch.id}