feat(file-summary): 实现文件处理技能链路
This commit is contained in:
25
tests/test_file_summary_archive.py
Normal file
25
tests/test_file_summary_archive.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from zipfile import ZipFile
|
||||
import pytest
|
||||
|
||||
from review_agent.file_summary.services.archive import extract_archive
|
||||
|
||||
|
||||
def test_extract_zip_preserves_safe_paths(tmp_path):
|
||||
archive_path = tmp_path / "safe.zip"
|
||||
with ZipFile(archive_path, "w") as archive:
|
||||
archive.writestr("dir/a.txt", "content")
|
||||
|
||||
target = tmp_path / "out"
|
||||
extracted = extract_archive(archive_path, target)
|
||||
|
||||
assert extracted == [target / "dir" / "a.txt"]
|
||||
assert (target / "dir" / "a.txt").read_text(encoding="utf-8") == "content"
|
||||
|
||||
|
||||
def test_extract_zip_rejects_path_traversal(tmp_path):
|
||||
archive_path = tmp_path / "evil.zip"
|
||||
with ZipFile(archive_path, "w") as archive:
|
||||
archive.writestr("../evil.txt", "bad")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
extract_archive(archive_path, tmp_path / "out")
|
||||
24
tests/test_file_summary_inventory.py
Normal file
24
tests/test_file_summary_inventory.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
from review_agent.file_summary.services.inventory import scan_files_to_items
|
||||
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_scan_files_to_items_preserves_relative_paths(tmp_path, django_user_model):
|
||||
root = tmp_path / "work"
|
||||
(root / "a").mkdir(parents=True)
|
||||
(root / "a" / "one.pdf").write_bytes(b"pdf")
|
||||
(root / "two.txt").write_text("x", encoding="utf-8")
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-I")
|
||||
|
||||
items = scan_files_to_items(batch=batch, roots=[root])
|
||||
|
||||
assert [item.relative_path for item in items] == ["a/one.pdf", "two.txt"]
|
||||
assert FileSummaryItem.objects.filter(batch=batch).count() == 2
|
||||
assert items[0].statistics_status == FileSummaryItem.StatisticsStatus.SKIPPED
|
||||
66
tests/test_file_summary_page_count.py
Normal file
66
tests/test_file_summary_page_count.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import pytest
|
||||
from docx import Document
|
||||
from openpyxl import Workbook
|
||||
from pptx import Presentation
|
||||
|
||||
from review_agent.file_summary.services.page_count import count_document_pages
|
||||
from review_agent.file_summary.skills.document_page_count import DocumentPageCountSkill
|
||||
from review_agent.file_summary.skills.base import WorkflowContext
|
||||
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_count_document_pages_for_office_formats(tmp_path):
|
||||
docx_path = tmp_path / "a.docx"
|
||||
Document().save(docx_path)
|
||||
|
||||
xlsx_path = tmp_path / "a.xlsx"
|
||||
workbook = Workbook()
|
||||
workbook.create_sheet("第二页")
|
||||
workbook.save(xlsx_path)
|
||||
|
||||
pptx_path = tmp_path / "a.pptx"
|
||||
presentation = Presentation()
|
||||
presentation.slides.add_slide(presentation.slide_layouts[6])
|
||||
presentation.save(pptx_path)
|
||||
|
||||
assert count_document_pages(docx_path).status in {"success", "uncertain"}
|
||||
assert count_document_pages(xlsx_path).page_count == 2
|
||||
assert count_document_pages(pptx_path).page_count == 1
|
||||
|
||||
|
||||
def test_document_page_count_skill_marks_unsupported_and_success(tmp_path, django_user_model):
|
||||
xlsx_path = tmp_path / "a.xlsx"
|
||||
workbook = Workbook()
|
||||
workbook.save(xlsx_path)
|
||||
txt_path = tmp_path / "a.txt"
|
||||
txt_path.write_text("x", encoding="utf-8")
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-P")
|
||||
xlsx_item = FileSummaryItem.objects.create(
|
||||
batch=batch,
|
||||
file_index=1,
|
||||
file_name="a.xlsx",
|
||||
file_type="xlsx",
|
||||
relative_path="a.xlsx",
|
||||
storage_path=str(xlsx_path),
|
||||
)
|
||||
txt_item = FileSummaryItem.objects.create(
|
||||
batch=batch,
|
||||
file_index=2,
|
||||
file_name="a.txt",
|
||||
file_type="txt",
|
||||
relative_path="a.txt",
|
||||
storage_path=str(txt_path),
|
||||
)
|
||||
|
||||
result = DocumentPageCountSkill().run(WorkflowContext(batch=batch))
|
||||
|
||||
xlsx_item.refresh_from_db()
|
||||
txt_item.refresh_from_db()
|
||||
assert result.success is True
|
||||
assert xlsx_item.statistics_status == FileSummaryItem.StatisticsStatus.SUCCESS
|
||||
assert txt_item.statistics_status == FileSummaryItem.StatisticsStatus.UNSUPPORTED
|
||||
29
tests/test_file_summary_product_detect.py
Normal file
29
tests/test_file_summary_product_detect.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import pytest
|
||||
|
||||
from review_agent.file_summary.services.product_detect import detect_product_name
|
||||
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_detect_product_name_from_top_level_directory(django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="新对话 06-06")
|
||||
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-D")
|
||||
FileSummaryItem.objects.create(
|
||||
batch=batch,
|
||||
file_index=1,
|
||||
file_name="说明书.docx",
|
||||
file_type="docx",
|
||||
relative_path="甲型试剂盒/说明书.docx",
|
||||
storage_path="x",
|
||||
)
|
||||
|
||||
product_name = detect_product_name(batch)
|
||||
|
||||
batch.refresh_from_db()
|
||||
conversation.refresh_from_db()
|
||||
assert product_name == "甲型试剂盒"
|
||||
assert batch.product_name == "甲型试剂盒"
|
||||
assert conversation.title == "甲型试剂盒-文件汇总"
|
||||
27
tests/test_file_summary_skills.py
Normal file
27
tests/test_file_summary_skills.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import pytest
|
||||
|
||||
from review_agent.file_summary.skills.base import BaseSkill, SkillResult, WorkflowContext
|
||||
from review_agent.file_summary.skills.registry import SkillRegistry
|
||||
|
||||
|
||||
class EchoSkill(BaseSkill):
|
||||
name = "echo"
|
||||
|
||||
def run(self, context):
|
||||
return SkillResult(success=True, data={"batch_id": context.batch.id})
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_skill_registry_executes_registered_skill(django_user_model):
|
||||
from review_agent.models import Conversation, FileSummaryBatch
|
||||
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-X")
|
||||
registry = SkillRegistry()
|
||||
registry.register(EchoSkill())
|
||||
|
||||
result = registry.execute("echo", WorkflowContext(batch=batch))
|
||||
|
||||
assert result.success is True
|
||||
assert result.data == {"batch_id": batch.id}
|
||||
Reference in New Issue
Block a user