From 855afcdee3f82b3a88b8001279be548abe8f8cf9 Mon Sep 17 00:00:00 2001 From: bruce Date: Sat, 6 Jun 2026 01:11:11 +0800 Subject: [PATCH 1/7] =?UTF-8?q?feat(file-summary):=20=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=B1=87=E6=80=BB=E6=95=B0=E6=8D=AE=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 12 + pytest.ini | 3 + requirements.txt | 7 + ...mmarybatch_exportedsummaryfile_and_more.py | 481 ++++++++++++++++++ review_agent/models.py | 290 +++++++++++ tests/test_file_summary_models.py | 113 ++++ 6 files changed, 906 insertions(+) create mode 100644 pytest.ini create mode 100644 review_agent/migrations/0002_fileattachment_filesummarybatch_exportedsummaryfile_and_more.py create mode 100644 tests/test_file_summary_models.py diff --git a/README.md b/README.md index de78a58..3f52755 100644 --- a/README.md +++ b/README.md @@ -18,3 +18,15 @@ python manage.py runserver - 登录页:http://127.0.0.1:8000/login/ - 首页:http://127.0.0.1:8000/ - 管理后台:http://127.0.0.1:8000/admin/ + +## 文件汇总依赖 + +自动汇总文件目录与页数功能使用轻量 Python 库读取 PDF、Word、Excel、PowerPoint 文件。 +Docker 或生产环境如需处理 `.7z` 与 `.rar` 压缩包,还需要安装系统 `7z`/`p7zip` +命令,并确认以下命令可用: + +```bash +7z +``` + +LibreOffice 不是必需依赖,仅作为未来增强老格式文档解析的可选能力。 diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..7a4fb9b --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +DJANGO_SETTINGS_MODULE = config.settings +python_files = tests.py test_*.py *_tests.py diff --git a/requirements.txt b/requirements.txt index af9b7e1..f26a954 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,8 @@ Django>=5.0,<6.0 +pypdf>=5.0 +python-docx>=1.1 +python-pptx>=1.0 +openpyxl>=3.1 +xlrd>=2.0 +olefile>=0.47 +py7zr>=0.21 diff --git a/review_agent/migrations/0002_fileattachment_filesummarybatch_exportedsummaryfile_and_more.py b/review_agent/migrations/0002_fileattachment_filesummarybatch_exportedsummaryfile_and_more.py new file mode 100644 index 0000000..10ef36a --- /dev/null +++ b/review_agent/migrations/0002_fileattachment_filesummarybatch_exportedsummaryfile_and_more.py @@ -0,0 +1,481 @@ +# Generated by Django 5.2.14 on 2026-06-05 17:09 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("review_agent", "0001_initial"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name="FileAttachment", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("original_name", models.CharField(max_length=255)), + ("version_no", models.PositiveIntegerField(default=1)), + ("is_active", models.BooleanField(default=True)), + ("storage_path", models.CharField(max_length=500)), + ("file_size", models.BigIntegerField(default=0)), + ( + "content_type", + models.CharField(blank=True, default="", max_length=120), + ), + ( + "upload_status", + models.CharField( + choices=[ + ("uploaded", "已上传"), + ("bound", "已绑定"), + ("deleted", "已删除"), + ], + default="uploaded", + max_length=20, + ), + ), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "conversation", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="file_attachments", + to="review_agent.conversation", + ), + ), + ( + "user", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="review_file_attachments", + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + "db_table": "ra_file_attachment", + "ordering": ["-created_at", "-id"], + }, + ), + migrations.CreateModel( + name="FileSummaryBatch", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("batch_no", models.CharField(max_length=64, unique=True)), + ( + "product_name", + models.CharField(blank=True, default="", max_length=200), + ), + ( + "status", + models.CharField( + choices=[ + ("pending", "待执行"), + ("running", "执行中"), + ("success", "成功"), + ("failed", "失败"), + ], + default="pending", + max_length=20, + ), + ), + ("total_files", models.IntegerField(default=0)), + ("supported_files", models.IntegerField(default=0)), + ("success_files", models.IntegerField(default=0)), + ("failed_files", models.IntegerField(default=0)), + ("unsupported_files", models.IntegerField(default=0)), + ("uncertain_files", models.IntegerField(default=0)), + ("total_pages", models.IntegerField(default=0)), + ("work_dir", models.CharField(blank=True, default="", max_length=500)), + ("error_message", models.TextField(blank=True, default="")), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("started_at", models.DateTimeField(blank=True, null=True)), + ("finished_at", models.DateTimeField(blank=True, null=True)), + ( + "conversation", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="file_summary_batches", + to="review_agent.conversation", + ), + ), + ( + "trigger_message", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="triggered_file_summary_batches", + to="review_agent.message", + ), + ), + ( + "user", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="review_file_summary_batches", + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + "db_table": "ra_file_summary_batch", + "ordering": ["-created_at", "-id"], + }, + ), + migrations.CreateModel( + name="ExportedSummaryFile", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "export_type", + models.CharField( + choices=[("markdown", "Markdown"), ("excel", "Excel")], + max_length=20, + ), + ), + ("file_name", models.CharField(max_length=255)), + ("storage_path", models.CharField(max_length=500)), + ( + "status", + models.CharField( + choices=[("success", "成功"), ("failed", "失败")], + default="success", + max_length=20, + ), + ), + ("error_message", models.TextField(blank=True, default="")), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "batch", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="exports", + to="review_agent.filesummarybatch", + ), + ), + ], + options={ + "db_table": "ra_exported_summary_file", + "ordering": ["-created_at", "-id"], + }, + ), + migrations.CreateModel( + name="FileSummaryBatchAttachment", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "source_role", + models.CharField( + choices=[("archive", "压缩包"), ("multi_file", "多文件")], + default="multi_file", + max_length=20, + ), + ), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "attachment", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="batch_bindings", + to="review_agent.fileattachment", + ), + ), + ( + "batch", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="batch_attachments", + to="review_agent.filesummarybatch", + ), + ), + ], + options={ + "db_table": "ra_file_summary_batch_attachment", + }, + ), + migrations.CreateModel( + name="FileSummaryItem", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("file_index", models.PositiveIntegerField()), + ( + "directory_level", + models.CharField(blank=True, default="", max_length=300), + ), + ("file_name", models.CharField(max_length=255)), + ("file_type", models.CharField(max_length=20)), + ("relative_path", models.CharField(max_length=500)), + ("storage_path", models.CharField(max_length=500)), + ("page_count", models.IntegerField(blank=True, null=True)), + ( + "statistics_status", + models.CharField( + choices=[ + ("success", "成功"), + ("failed", "失败"), + ("unsupported", "不支持"), + ("uncertain", "不确定"), + ("skipped", "跳过"), + ], + default="skipped", + max_length=20, + ), + ), + ("retry_count", models.PositiveIntegerField(default=0)), + ("error_message", models.TextField(blank=True, default="")), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "batch", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="items", + to="review_agent.filesummarybatch", + ), + ), + ], + options={ + "db_table": "ra_file_summary_item", + "ordering": ["file_index", "id"], + }, + ), + migrations.CreateModel( + name="WorkflowEvent", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("event_type", models.CharField(max_length=40)), + ("payload", models.JSONField(default=dict)), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "batch", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="events", + to="review_agent.filesummarybatch", + ), + ), + ], + options={ + "db_table": "ra_workflow_event", + "ordering": ["id"], + }, + ), + migrations.CreateModel( + name="WorkflowNodeRun", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("node_code", models.CharField(max_length=40)), + ("node_name", models.CharField(max_length=80)), + ( + "status", + models.CharField( + choices=[ + ("pending", "等待中"), + ("running", "执行中"), + ("retrying", "重试中"), + ("success", "成功"), + ("failed", "失败"), + ("skipped", "跳过"), + ], + default="pending", + max_length=20, + ), + ), + ("progress", models.PositiveIntegerField(default=0)), + ("message", models.TextField(blank=True, default="")), + ("started_at", models.DateTimeField(blank=True, null=True)), + ("finished_at", models.DateTimeField(blank=True, null=True)), + ( + "batch", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="node_runs", + to="review_agent.filesummarybatch", + ), + ), + ], + options={ + "db_table": "ra_workflow_node_run", + }, + ), + migrations.AddIndex( + model_name="fileattachment", + index=models.Index( + fields=["conversation", "created_at"], + name="idx_ra_attachment_conv_created", + ), + ), + migrations.AddIndex( + model_name="fileattachment", + index=models.Index( + fields=["user", "created_at"], name="idx_ra_attachment_user_created" + ), + ), + migrations.AddIndex( + model_name="fileattachment", + index=models.Index( + fields=["conversation", "original_name", "is_active"], + name="idx_ra_attachment_active", + ), + ), + migrations.AddConstraint( + model_name="fileattachment", + constraint=models.UniqueConstraint( + fields=("conversation", "original_name", "version_no"), + name="uq_ra_attachment_conv_name_version", + ), + ), + migrations.AddIndex( + model_name="filesummarybatch", + index=models.Index( + fields=["conversation", "created_at"], name="idx_ra_batch_conv_created" + ), + ), + migrations.AddIndex( + model_name="filesummarybatch", + index=models.Index( + fields=["user", "created_at"], name="idx_ra_batch_user_created" + ), + ), + migrations.AddIndex( + model_name="filesummarybatch", + index=models.Index( + fields=["status", "created_at"], name="idx_ra_batch_status" + ), + ), + migrations.AddIndex( + model_name="exportedsummaryfile", + index=models.Index( + fields=["batch", "export_type"], name="idx_ra_export_batch_type" + ), + ), + migrations.AddIndex( + model_name="exportedsummaryfile", + index=models.Index( + fields=["batch", "created_at"], name="idx_ra_export_batch_created" + ), + ), + migrations.AddIndex( + model_name="filesummarybatchattachment", + index=models.Index( + fields=["batch", "created_at"], name="idx_ra_batch_attachment_batch" + ), + ), + migrations.AddIndex( + model_name="filesummarybatchattachment", + index=models.Index(fields=["attachment"], name="idx_ra_batch_attach_file"), + ), + migrations.AddConstraint( + model_name="filesummarybatchattachment", + constraint=models.UniqueConstraint( + fields=("batch", "attachment"), name="uq_ra_batch_attachment" + ), + ), + migrations.AddIndex( + model_name="filesummaryitem", + index=models.Index( + fields=["batch", "file_index"], name="idx_ra_item_batch_index" + ), + ), + migrations.AddIndex( + model_name="filesummaryitem", + index=models.Index( + fields=["batch", "statistics_status"], name="idx_ra_item_batch_status" + ), + ), + migrations.AddIndex( + model_name="filesummaryitem", + index=models.Index( + fields=["batch", "file_type"], name="idx_ra_item_batch_type" + ), + ), + migrations.AddConstraint( + model_name="filesummaryitem", + constraint=models.UniqueConstraint( + fields=("batch", "relative_path"), name="uq_ra_item_batch_relative_path" + ), + ), + migrations.AddIndex( + model_name="workflowevent", + index=models.Index(fields=["batch", "id"], name="idx_ra_event_batch_id"), + ), + migrations.AddIndex( + model_name="workflowevent", + index=models.Index( + fields=["batch", "created_at"], name="idx_ra_event_batch_created" + ), + ), + migrations.AddIndex( + model_name="workflownoderun", + index=models.Index( + fields=["batch", "status"], name="idx_ra_node_batch_status" + ), + ), + migrations.AddConstraint( + model_name="workflownoderun", + constraint=models.UniqueConstraint( + fields=("batch", "node_code"), name="uq_ra_node_batch_code" + ), + ), + ] diff --git a/review_agent/models.py b/review_agent/models.py index 46eba84..a5af82c 100644 --- a/review_agent/models.py +++ b/review_agent/models.py @@ -42,3 +42,293 @@ class Message(models.Model): def __str__(self) -> str: return f"{self.get_role_display()} - {self.conversation_id}" + + +class FileAttachment(models.Model): + """Stores an uploaded file version for one conversation.""" + + class UploadStatus(models.TextChoices): + UPLOADED = "uploaded", "已上传" + BOUND = "bound", "已绑定" + DELETED = "deleted", "已删除" + + conversation = models.ForeignKey( + Conversation, + on_delete=models.CASCADE, + related_name="file_attachments", + ) + user = models.ForeignKey( + settings.AUTH_USER_MODEL, + on_delete=models.CASCADE, + related_name="review_file_attachments", + ) + original_name = models.CharField(max_length=255) + version_no = models.PositiveIntegerField(default=1) + is_active = models.BooleanField(default=True) + storage_path = models.CharField(max_length=500) + file_size = models.BigIntegerField(default=0) + content_type = models.CharField(max_length=120, blank=True, default="") + upload_status = models.CharField( + max_length=20, + choices=UploadStatus.choices, + default=UploadStatus.UPLOADED, + ) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + db_table = "ra_file_attachment" + ordering = ["-created_at", "-id"] + constraints = [ + models.UniqueConstraint( + fields=["conversation", "original_name", "version_no"], + name="uq_ra_attachment_conv_name_version", + ) + ] + indexes = [ + models.Index( + fields=["conversation", "created_at"], + name="idx_ra_attachment_conv_created", + ), + models.Index( + fields=["user", "created_at"], + name="idx_ra_attachment_user_created", + ), + models.Index( + fields=["conversation", "original_name", "is_active"], + name="idx_ra_attachment_active", + ), + ] + + def __str__(self) -> str: + return f"{self.original_name} v{self.version_no}" + + +class FileSummaryBatch(models.Model): + """Tracks one automatic file inventory and page-count workflow run.""" + + class Status(models.TextChoices): + PENDING = "pending", "待执行" + RUNNING = "running", "执行中" + SUCCESS = "success", "成功" + FAILED = "failed", "失败" + + conversation = models.ForeignKey( + Conversation, + on_delete=models.CASCADE, + related_name="file_summary_batches", + ) + user = models.ForeignKey( + settings.AUTH_USER_MODEL, + on_delete=models.CASCADE, + related_name="review_file_summary_batches", + ) + trigger_message = models.ForeignKey( + Message, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="triggered_file_summary_batches", + ) + batch_no = models.CharField(max_length=64, unique=True) + product_name = models.CharField(max_length=200, blank=True, default="") + status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING) + total_files = models.IntegerField(default=0) + supported_files = models.IntegerField(default=0) + success_files = models.IntegerField(default=0) + failed_files = models.IntegerField(default=0) + unsupported_files = models.IntegerField(default=0) + uncertain_files = models.IntegerField(default=0) + total_pages = models.IntegerField(default=0) + work_dir = models.CharField(max_length=500, blank=True, default="") + error_message = models.TextField(blank=True, default="") + created_at = models.DateTimeField(auto_now_add=True) + started_at = models.DateTimeField(null=True, blank=True) + finished_at = models.DateTimeField(null=True, blank=True) + + class Meta: + db_table = "ra_file_summary_batch" + ordering = ["-created_at", "-id"] + indexes = [ + models.Index(fields=["conversation", "created_at"], name="idx_ra_batch_conv_created"), + models.Index(fields=["user", "created_at"], name="idx_ra_batch_user_created"), + models.Index(fields=["status", "created_at"], name="idx_ra_batch_status"), + ] + + def __str__(self) -> str: + return self.batch_no + + +class FileSummaryBatchAttachment(models.Model): + """Binds a workflow batch to the exact attachment versions it uses.""" + + class SourceRole(models.TextChoices): + ARCHIVE = "archive", "压缩包" + MULTI_FILE = "multi_file", "多文件" + + batch = models.ForeignKey( + FileSummaryBatch, + on_delete=models.CASCADE, + related_name="batch_attachments", + ) + attachment = models.ForeignKey( + FileAttachment, + on_delete=models.CASCADE, + related_name="batch_bindings", + ) + source_role = models.CharField( + max_length=20, + choices=SourceRole.choices, + default=SourceRole.MULTI_FILE, + ) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + db_table = "ra_file_summary_batch_attachment" + constraints = [ + models.UniqueConstraint( + fields=["batch", "attachment"], + name="uq_ra_batch_attachment", + ) + ] + indexes = [ + models.Index( + fields=["batch", "created_at"], + name="idx_ra_batch_attachment_batch", + ), + models.Index(fields=["attachment"], name="idx_ra_batch_attach_file"), + ] + + +class FileSummaryItem(models.Model): + """Stores one scanned file and its page-count result.""" + + class StatisticsStatus(models.TextChoices): + SUCCESS = "success", "成功" + FAILED = "failed", "失败" + UNSUPPORTED = "unsupported", "不支持" + UNCERTAIN = "uncertain", "不确定" + SKIPPED = "skipped", "跳过" + + batch = models.ForeignKey( + FileSummaryBatch, + on_delete=models.CASCADE, + related_name="items", + ) + file_index = models.PositiveIntegerField() + directory_level = models.CharField(max_length=300, blank=True, default="") + file_name = models.CharField(max_length=255) + file_type = models.CharField(max_length=20) + relative_path = models.CharField(max_length=500) + storage_path = models.CharField(max_length=500) + page_count = models.IntegerField(null=True, blank=True) + statistics_status = models.CharField( + max_length=20, + choices=StatisticsStatus.choices, + default=StatisticsStatus.SKIPPED, + ) + retry_count = models.PositiveIntegerField(default=0) + error_message = models.TextField(blank=True, default="") + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + db_table = "ra_file_summary_item" + ordering = ["file_index", "id"] + constraints = [ + models.UniqueConstraint( + fields=["batch", "relative_path"], + name="uq_ra_item_batch_relative_path", + ) + ] + indexes = [ + models.Index(fields=["batch", "file_index"], name="idx_ra_item_batch_index"), + models.Index(fields=["batch", "statistics_status"], name="idx_ra_item_batch_status"), + models.Index(fields=["batch", "file_type"], name="idx_ra_item_batch_type"), + ] + + +class WorkflowNodeRun(models.Model): + """Stores recoverable status for one workflow node.""" + + class Status(models.TextChoices): + PENDING = "pending", "等待中" + RUNNING = "running", "执行中" + RETRYING = "retrying", "重试中" + SUCCESS = "success", "成功" + FAILED = "failed", "失败" + SKIPPED = "skipped", "跳过" + + batch = models.ForeignKey( + FileSummaryBatch, + on_delete=models.CASCADE, + related_name="node_runs", + ) + node_code = models.CharField(max_length=40) + node_name = models.CharField(max_length=80) + status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING) + progress = models.PositiveIntegerField(default=0) + message = models.TextField(blank=True, default="") + started_at = models.DateTimeField(null=True, blank=True) + finished_at = models.DateTimeField(null=True, blank=True) + + class Meta: + db_table = "ra_workflow_node_run" + constraints = [ + models.UniqueConstraint(fields=["batch", "node_code"], name="uq_ra_node_batch_code") + ] + indexes = [ + models.Index(fields=["batch", "status"], name="idx_ra_node_batch_status"), + ] + + +class WorkflowEvent(models.Model): + """Persists workflow events for SSE replay and diagnostics.""" + + batch = models.ForeignKey( + FileSummaryBatch, + on_delete=models.CASCADE, + related_name="events", + ) + event_type = models.CharField(max_length=40) + payload = models.JSONField(default=dict) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + db_table = "ra_workflow_event" + ordering = ["id"] + indexes = [ + models.Index(fields=["batch", "id"], name="idx_ra_event_batch_id"), + models.Index(fields=["batch", "created_at"], name="idx_ra_event_batch_created"), + ] + + +class ExportedSummaryFile(models.Model): + """Stores generated report files for permission-checked download.""" + + class ExportType(models.TextChoices): + MARKDOWN = "markdown", "Markdown" + EXCEL = "excel", "Excel" + + class Status(models.TextChoices): + SUCCESS = "success", "成功" + FAILED = "failed", "失败" + + batch = models.ForeignKey( + FileSummaryBatch, + on_delete=models.CASCADE, + related_name="exports", + ) + export_type = models.CharField(max_length=20, choices=ExportType.choices) + file_name = models.CharField(max_length=255) + storage_path = models.CharField(max_length=500) + status = models.CharField(max_length=20, choices=Status.choices, default=Status.SUCCESS) + error_message = models.TextField(blank=True, default="") + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + db_table = "ra_exported_summary_file" + ordering = ["-created_at", "-id"] + indexes = [ + models.Index(fields=["batch", "export_type"], name="idx_ra_export_batch_type"), + models.Index(fields=["batch", "created_at"], name="idx_ra_export_batch_created"), + ] diff --git a/tests/test_file_summary_models.py b/tests/test_file_summary_models.py new file mode 100644 index 0000000..52ea6d0 --- /dev/null +++ b/tests/test_file_summary_models.py @@ -0,0 +1,113 @@ +import pytest +from django.contrib.auth import get_user_model +from django.db import IntegrityError, transaction + +from review_agent.models import ( + Conversation, + ExportedSummaryFile, + FileAttachment, + FileSummaryBatch, + FileSummaryBatchAttachment, + FileSummaryItem, +) + + +pytestmark = pytest.mark.django_db + + +def create_user(username="u1"): + return get_user_model().objects.create_user(username=username, password="pass") + + +def test_attachment_versions_are_unique_per_conversation_and_name(): + user = create_user() + conversation = Conversation.objects.create(user=user, title="会话") + + first = FileAttachment.objects.create( + conversation=conversation, + user=user, + original_name="资料.docx", + version_no=1, + is_active=False, + storage_path="media/a.docx", + file_size=10, + ) + second = FileAttachment.objects.create( + conversation=conversation, + user=user, + original_name="资料.docx", + version_no=2, + storage_path="media/b.docx", + file_size=12, + ) + + assert first.version_no == 1 + assert second.version_no == 2 + + with pytest.raises(IntegrityError), transaction.atomic(): + FileAttachment.objects.create( + conversation=conversation, + user=user, + original_name="资料.docx", + version_no=2, + storage_path="media/c.docx", + file_size=14, + ) + + +def test_batch_attachment_and_item_unique_constraints(): + user = create_user() + conversation = Conversation.objects.create(user=user, title="会话") + attachment = FileAttachment.objects.create( + conversation=conversation, + user=user, + original_name="资料.docx", + storage_path="media/a.docx", + file_size=10, + ) + batch = FileSummaryBatch.objects.create( + conversation=conversation, + user=user, + batch_no="FS-001", + ) + + FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment) + with pytest.raises(IntegrityError), transaction.atomic(): + FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment) + + FileSummaryItem.objects.create( + batch=batch, + file_index=1, + file_name="资料.docx", + file_type="docx", + relative_path="资料.docx", + storage_path="media/a.docx", + ) + with pytest.raises(IntegrityError), transaction.atomic(): + FileSummaryItem.objects.create( + batch=batch, + file_index=2, + file_name="资料.docx", + file_type="docx", + relative_path="资料.docx", + storage_path="media/a.docx", + ) + + +def test_exported_file_traces_to_user_and_conversation(): + user = create_user() + conversation = Conversation.objects.create(user=user, title="会话") + batch = FileSummaryBatch.objects.create( + conversation=conversation, + user=user, + batch_no="FS-002", + ) + exported = ExportedSummaryFile.objects.create( + batch=batch, + export_type=ExportedSummaryFile.ExportType.MARKDOWN, + file_name="summary.md", + storage_path="media/summary.md", + ) + + assert exported.batch.user == user + assert exported.batch.conversation == conversation From eb87d9040d8516ba3e4b2e08ec3b4f9324c79870 Mon Sep 17 00:00:00 2001 From: bruce Date: Sat, 6 Jun 2026 01:13:23 +0800 Subject: [PATCH 2/7] =?UTF-8?q?feat(file-summary):=20=E5=AE=9E=E7=8E=B0?= =?UTF-8?q?=E5=AF=B9=E8=AF=9D=E9=99=84=E4=BB=B6=E4=B8=8A=E4=BC=A0=E6=8E=A5?= =?UTF-8?q?=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config/urls.py | 3 +- review_agent/file_summary/__init__.py | 1 + review_agent/file_summary/constants.py | 4 ++ review_agent/file_summary/storage.py | 88 ++++++++++++++++++++++++++ review_agent/file_summary/views.py | 58 +++++++++++++++++ review_agent/urls.py | 22 +++++++ tests/test_file_summary_storage.py | 48 ++++++++++++++ tests/test_file_summary_views.py | 75 ++++++++++++++++++++++ 8 files changed, 298 insertions(+), 1 deletion(-) create mode 100644 review_agent/file_summary/__init__.py create mode 100644 review_agent/file_summary/constants.py create mode 100644 review_agent/file_summary/storage.py create mode 100644 review_agent/file_summary/views.py create mode 100644 review_agent/urls.py create mode 100644 tests/test_file_summary_storage.py create mode 100644 tests/test_file_summary_views.py diff --git a/config/urls.py b/config/urls.py index ec39f6a..cd123c8 100644 --- a/config/urls.py +++ b/config/urls.py @@ -1,11 +1,12 @@ from django.contrib import admin from django.contrib.auth.views import LoginView, LogoutView, PasswordChangeView -from django.urls import path +from django.urls import include, path from review_agent.views import stream_chat, workspace urlpatterns = [ path("", workspace, name="home"), + path("", include("review_agent.urls")), path("chat/stream/", stream_chat, name="chat_stream"), path( "login/", diff --git a/review_agent/file_summary/__init__.py b/review_agent/file_summary/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/review_agent/file_summary/__init__.py @@ -0,0 +1 @@ + diff --git a/review_agent/file_summary/constants.py b/review_agent/file_summary/constants.py new file mode 100644 index 0000000..3421ec9 --- /dev/null +++ b/review_agent/file_summary/constants.py @@ -0,0 +1,4 @@ +from pathlib import Path + + +ATTACHMENT_ROOT = Path("file_summary") / "users" diff --git a/review_agent/file_summary/storage.py b/review_agent/file_summary/storage.py new file mode 100644 index 0000000..7c2a0c7 --- /dev/null +++ b/review_agent/file_summary/storage.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +from pathlib import Path +from uuid import uuid4 + +from django.conf import settings +from django.db import transaction +from django.utils.text import get_valid_filename + +from review_agent.models import Conversation, FileAttachment + +from .constants import ATTACHMENT_ROOT + + +def _safe_original_name(name: str) -> str: + clean = get_valid_filename(Path(name).name) + return clean or f"upload-{uuid4().hex}" + + +def _relative_attachment_path(conversation: Conversation, filename: str, version_no: int) -> Path: + suffix = Path(filename).suffix + stem = Path(filename).stem + stored_name = f"{stem}_v{version_no}_{uuid4().hex[:8]}{suffix}" + return ( + ATTACHMENT_ROOT + / str(conversation.user_id) + / str(conversation.pk) + / "attachments" + / stored_name + ) + + +def _ensure_inside_media_root(path: Path) -> None: + media_root = Path(settings.MEDIA_ROOT).resolve() + resolved = path.resolve() + if media_root != resolved and media_root not in resolved.parents: + raise ValueError("上传路径必须位于 MEDIA_ROOT 内。") + + +@transaction.atomic +def save_uploaded_attachment(*, conversation: Conversation, user, uploaded_file) -> FileAttachment: + """Stores an uploaded file and creates a versioned attachment record.""" + + original_name = _safe_original_name(uploaded_file.name) + latest = ( + FileAttachment.objects.filter(conversation=conversation, original_name=original_name) + .order_by("-version_no") + .first() + ) + version_no = (latest.version_no if latest else 0) + 1 + relative_path = _relative_attachment_path(conversation, original_name, version_no) + absolute_path = Path(settings.MEDIA_ROOT) / relative_path + _ensure_inside_media_root(absolute_path) + absolute_path.parent.mkdir(parents=True, exist_ok=True) + + with absolute_path.open("wb") as target: + for chunk in uploaded_file.chunks(): + target.write(chunk) + + FileAttachment.objects.filter( + conversation=conversation, + original_name=original_name, + is_active=True, + ).update(is_active=False) + + return FileAttachment.objects.create( + conversation=conversation, + user=user, + original_name=original_name, + version_no=version_no, + is_active=True, + storage_path=relative_path.as_posix(), + file_size=uploaded_file.size, + content_type=getattr(uploaded_file, "content_type", "") or "", + ) + + +def serialize_attachment(attachment: FileAttachment) -> dict[str, object]: + return { + "id": attachment.pk, + "original_name": attachment.original_name, + "version_no": attachment.version_no, + "is_active": attachment.is_active, + "file_size": attachment.file_size, + "content_type": attachment.content_type, + "upload_status": attachment.upload_status, + "created_at": attachment.created_at.isoformat(), + } diff --git a/review_agent/file_summary/views.py b/review_agent/file_summary/views.py new file mode 100644 index 0000000..1b48924 --- /dev/null +++ b/review_agent/file_summary/views.py @@ -0,0 +1,58 @@ +from django.contrib.auth.decorators import login_required +from django.http import Http404, JsonResponse +from django.views.decorators.http import require_http_methods + +from review_agent.models import Conversation, FileAttachment + +from .storage import save_uploaded_attachment, serialize_attachment + + +def _conversation_for_user(user, conversation_id: int) -> Conversation: + conversation = Conversation.objects.filter(pk=conversation_id, user=user).first() + if not conversation: + raise Http404("对话不存在。") + return conversation + + +@require_http_methods(["POST", "GET"]) +@login_required +def attachments(request, conversation_id: int): + conversation = _conversation_for_user(request.user, conversation_id) + + if request.method == "POST": + files = request.FILES.getlist("files") + if not files: + return JsonResponse({"error": "请选择至少一个文件。"}, status=400) + saved = [ + save_uploaded_attachment( + conversation=conversation, + user=request.user, + uploaded_file=uploaded_file, + ) + for uploaded_file in files + ] + return JsonResponse({"attachments": [serialize_attachment(item) for item in saved]}) + + queryset = FileAttachment.objects.filter(conversation=conversation).order_by( + "original_name", + "-version_no", + ) + return JsonResponse({"attachments": [serialize_attachment(item) for item in queryset]}) + + +@require_http_methods(["DELETE"]) +@login_required +def attachment_detail(request, conversation_id: int, attachment_id: int): + conversation = _conversation_for_user(request.user, conversation_id) + attachment = FileAttachment.objects.filter( + pk=attachment_id, + conversation=conversation, + user=request.user, + ).first() + if not attachment: + raise Http404("附件不存在。") + + attachment.upload_status = FileAttachment.UploadStatus.DELETED + attachment.is_active = False + attachment.save(update_fields=["upload_status", "is_active"]) + return JsonResponse({"ok": True, "attachment": serialize_attachment(attachment)}) diff --git a/review_agent/urls.py b/review_agent/urls.py new file mode 100644 index 0000000..272291d --- /dev/null +++ b/review_agent/urls.py @@ -0,0 +1,22 @@ +from django.urls import path + +from .file_summary.views import attachment_detail, attachments + + +urlpatterns = [ + path( + "api/review-agent/conversations//attachments/", + attachments, + name="file_summary_attachment_upload", + ), + path( + "api/review-agent/conversations//attachments/", + attachments, + name="file_summary_attachment_list", + ), + path( + "api/review-agent/conversations//attachments//", + attachment_detail, + name="file_summary_attachment_detail", + ), +] diff --git a/tests/test_file_summary_storage.py b/tests/test_file_summary_storage.py new file mode 100644 index 0000000..38220b6 --- /dev/null +++ b/tests/test_file_summary_storage.py @@ -0,0 +1,48 @@ +from django.core.files.uploadedfile import SimpleUploadedFile +import pytest + +from review_agent.file_summary.storage import save_uploaded_attachment +from review_agent.models import Conversation, FileAttachment + + +pytestmark = pytest.mark.django_db + + +def test_save_uploaded_attachment_versions_same_name(settings, tmp_path, django_user_model): + settings.MEDIA_ROOT = tmp_path + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + + first = save_uploaded_attachment( + conversation=conversation, + user=user, + uploaded_file=SimpleUploadedFile("资料.docx", b"first"), + ) + second = save_uploaded_attachment( + conversation=conversation, + user=user, + uploaded_file=SimpleUploadedFile("资料.docx", b"second"), + ) + + first.refresh_from_db() + assert first.version_no == 1 + assert first.is_active is False + assert second.version_no == 2 + assert second.is_active is True + assert FileAttachment.objects.filter(conversation=conversation).count() == 2 + assert (tmp_path / second.storage_path).read_bytes() == b"second" + + +def test_save_uploaded_attachment_rejects_path_traversal(settings, tmp_path, django_user_model): + settings.MEDIA_ROOT = tmp_path + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + + attachment = save_uploaded_attachment( + conversation=conversation, + user=user, + uploaded_file=SimpleUploadedFile("../资料.docx", b"content"), + ) + + assert ".." not in attachment.storage_path + assert (tmp_path / attachment.storage_path).exists() diff --git a/tests/test_file_summary_views.py b/tests/test_file_summary_views.py new file mode 100644 index 0000000..bbf8745 --- /dev/null +++ b/tests/test_file_summary_views.py @@ -0,0 +1,75 @@ +from django.core.files.uploadedfile import SimpleUploadedFile +from django.urls import reverse +import pytest + +from review_agent.models import Conversation, FileAttachment + + +pytestmark = pytest.mark.django_db + + +def test_upload_attachments_requires_conversation_owner(client, settings, tmp_path, django_user_model): + settings.MEDIA_ROOT = tmp_path + owner = django_user_model.objects.create_user(username="owner", password="pass") + other = django_user_model.objects.create_user(username="other", password="pass") + conversation = Conversation.objects.create(user=owner, title="会话") + client.force_login(other) + + response = client.post( + reverse("file_summary_attachment_upload", args=[conversation.pk]), + {"files": [SimpleUploadedFile("a.docx", b"a")]}, + ) + + assert response.status_code == 404 + + +def test_attachment_api_requires_login(client, django_user_model): + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + + response = client.get(reverse("file_summary_attachment_list", args=[conversation.pk])) + + assert response.status_code == 302 + + +def test_upload_and_list_current_conversation_attachments(client, settings, tmp_path, django_user_model): + settings.MEDIA_ROOT = tmp_path + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + client.force_login(user) + + upload_response = client.post( + reverse("file_summary_attachment_upload", args=[conversation.pk]), + { + "files": [ + SimpleUploadedFile("a.docx", b"a", content_type="application/docx"), + SimpleUploadedFile("b.zip", b"b", content_type="application/zip"), + ] + }, + ) + list_response = client.get(reverse("file_summary_attachment_list", args=[conversation.pk])) + + assert upload_response.status_code == 200 + assert upload_response.json()["attachments"][0]["original_name"] == "a.docx" + assert len(list_response.json()["attachments"]) == 2 + + +def test_delete_attachment_is_logical_and_scoped(client, settings, tmp_path, django_user_model): + settings.MEDIA_ROOT = tmp_path + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + attachment = FileAttachment.objects.create( + conversation=conversation, + user=user, + original_name="a.docx", + storage_path="x/a.docx", + file_size=1, + ) + client.force_login(user) + + response = client.delete(reverse("file_summary_attachment_detail", args=[conversation.pk, attachment.pk])) + + attachment.refresh_from_db() + assert response.status_code == 200 + assert attachment.upload_status == FileAttachment.UploadStatus.DELETED + assert attachment.is_active is False From 51e7c0c007b38cf6cf9ad081f41ff1a66c2ef219 Mon Sep 17 00:00:00 2001 From: bruce Date: Sat, 6 Jun 2026 01:16:22 +0800 Subject: [PATCH 3/7] =?UTF-8?q?feat(file-summary):=20=E6=8E=A5=E5=85=A5?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=B1=87=E6=80=BB=E5=B7=A5=E4=BD=9C=E6=B5=81?= =?UTF-8?q?=E8=A7=A6=E5=8F=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- review_agent/file_summary/events.py | 16 +++ review_agent/file_summary/views.py | 49 +++++++ review_agent/file_summary/workflow.py | 127 ++++++++++++++++++ review_agent/file_summary/workflow_trigger.py | 30 +++++ review_agent/services.py | 49 +++++++ review_agent/urls.py | 12 +- tests/test_file_summary_trigger.py | 32 +++++ tests/test_file_summary_workflow.py | 102 ++++++++++++++ 8 files changed, 416 insertions(+), 1 deletion(-) create mode 100644 review_agent/file_summary/events.py create mode 100644 review_agent/file_summary/workflow.py create mode 100644 review_agent/file_summary/workflow_trigger.py create mode 100644 tests/test_file_summary_trigger.py create mode 100644 tests/test_file_summary_workflow.py diff --git a/review_agent/file_summary/events.py b/review_agent/file_summary/events.py new file mode 100644 index 0000000..3d9f80c --- /dev/null +++ b/review_agent/file_summary/events.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from review_agent.models import FileSummaryBatch, WorkflowEvent + + +def record_event(batch: FileSummaryBatch, event_type: str, payload: dict | None = None) -> WorkflowEvent: + return WorkflowEvent.objects.create(batch=batch, event_type=event_type, payload=payload or {}) + + +def serialize_event(event: WorkflowEvent) -> dict[str, object]: + return { + "id": event.pk, + "event_type": event.event_type, + "payload": event.payload, + "created_at": event.created_at.isoformat(), + } diff --git a/review_agent/file_summary/views.py b/review_agent/file_summary/views.py index 1b48924..fa4d169 100644 --- a/review_agent/file_summary/views.py +++ b/review_agent/file_summary/views.py @@ -3,6 +3,8 @@ from django.http import Http404, JsonResponse from django.views.decorators.http import require_http_methods from review_agent.models import Conversation, FileAttachment +from review_agent.models import FileSummaryBatch, WorkflowEvent +from .events import serialize_event from .storage import save_uploaded_attachment, serialize_attachment @@ -56,3 +58,50 @@ def attachment_detail(request, conversation_id: int, attachment_id: int): attachment.is_active = False attachment.save(update_fields=["upload_status", "is_active"]) return JsonResponse({"ok": True, "attachment": serialize_attachment(attachment)}) + + +@require_http_methods(["GET"]) +@login_required +def batch_status(request, batch_id: int): + batch = FileSummaryBatch.objects.filter(pk=batch_id, user=request.user).first() + if not batch: + raise Http404("批次不存在。") + return JsonResponse( + { + "batch": { + "id": batch.pk, + "batch_no": batch.batch_no, + "status": batch.status, + "product_name": batch.product_name, + "total_files": batch.total_files, + "success_files": batch.success_files, + "failed_files": batch.failed_files, + "total_pages": batch.total_pages, + }, + "nodes": [ + { + "node_code": node.node_code, + "node_name": node.node_name, + "status": node.status, + "progress": node.progress, + "message": node.message, + } + for node in batch.node_runs.order_by("id") + ], + } + ) + + +@require_http_methods(["GET"]) +@login_required +def batch_events(request, batch_id: int): + batch = FileSummaryBatch.objects.filter(pk=batch_id, user=request.user).first() + if not batch: + raise Http404("批次不存在。") + after = request.GET.get("after") or "0" + try: + after_id = int(after) + except ValueError: + after_id = 0 + events = WorkflowEvent.objects.filter(batch=batch, pk__gt=after_id).order_by("id") + return JsonResponse({"events": [serialize_event(event) for event in events]}) diff --git a/review_agent/file_summary/workflow.py b/review_agent/file_summary/workflow.py new file mode 100644 index 0000000..9316350 --- /dev/null +++ b/review_agent/file_summary/workflow.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +from threading import Thread +from uuid import uuid4 + +from django.db import transaction +from django.utils import timezone + +from review_agent.models import ( + Conversation, + FileAttachment, + FileSummaryBatch, + FileSummaryBatchAttachment, + Message, + WorkflowNodeRun, +) + +from .events import record_event + + +NODE_DEFINITIONS = [ + ("upload", "附件固化"), + ("extract", "压缩包解压"), + ("inventory", "文件扫描"), + ("page_count", "页数统计"), + ("product_detect", "产品识别"), + ("report", "报告输出"), + ("complete", "完成"), +] + + +def build_batch_no() -> str: + return f"FS-{timezone.localtime().strftime('%Y%m%d%H%M%S')}-{uuid4().hex[:6]}" + + +@transaction.atomic +def create_file_summary_batch( + *, + conversation: Conversation, + user, + trigger_message: Message | None = None, +) -> FileSummaryBatch: + active_attachments = list( + FileAttachment.objects.select_for_update() + .filter(conversation=conversation, is_active=True) + .exclude(upload_status=FileAttachment.UploadStatus.DELETED) + .order_by("original_name", "-created_at") + ) + if not active_attachments: + raise ValueError("当前对话没有可用附件。") + + batch = FileSummaryBatch.objects.create( + conversation=conversation, + user=user, + trigger_message=trigger_message, + batch_no=build_batch_no(), + ) + + for attachment in active_attachments: + FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment) + attachment.upload_status = FileAttachment.UploadStatus.BOUND + attachment.save(update_fields=["upload_status"]) + + for code, name in NODE_DEFINITIONS: + WorkflowNodeRun.objects.create(batch=batch, node_code=code, node_name=name) + + record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no}) + return batch + + +class WorkflowExecutor: + def __init__(self, batch: FileSummaryBatch): + self.batch = batch + + def run(self) -> None: + self.batch.status = FileSummaryBatch.Status.RUNNING + self.batch.started_at = timezone.now() + self.batch.save(update_fields=["status", "started_at"]) + record_event(self.batch, "workflow_started", {"batch_id": self.batch.pk}) + + try: + for node in self.batch.node_runs.order_by("id"): + self._run_node(node) + except Exception as exc: + self.batch.status = FileSummaryBatch.Status.FAILED + self.batch.error_message = str(exc) + self.batch.finished_at = timezone.now() + self.batch.save(update_fields=["status", "error_message", "finished_at"]) + record_event(self.batch, "workflow_failed", {"message": str(exc)}) + return + + self.batch.status = FileSummaryBatch.Status.SUCCESS + self.batch.finished_at = timezone.now() + self.batch.save(update_fields=["status", "finished_at"]) + record_event(self.batch, "workflow_completed", {"batch_id": self.batch.pk}) + + def _run_node(self, node: WorkflowNodeRun) -> None: + now = timezone.now() + node.status = WorkflowNodeRun.Status.RUNNING + node.progress = 10 + node.started_at = now + node.message = f"{node.node_name}处理中" + node.save(update_fields=["status", "progress", "started_at", "message"]) + record_event( + self.batch, + "node_progress", + {"node_code": node.node_code, "status": node.status, "progress": node.progress}, + ) + + node.status = WorkflowNodeRun.Status.SUCCESS + node.progress = 100 + node.finished_at = timezone.now() + node.message = f"{node.node_name}完成" + node.save(update_fields=["status", "progress", "finished_at", "message"]) + record_event( + self.batch, + "node_progress", + {"node_code": node.node_code, "status": node.status, "progress": node.progress}, + ) + + +def start_file_summary_workflow(batch: FileSummaryBatch, *, async_run: bool = True) -> None: + executor = WorkflowExecutor(batch) + if not async_run: + executor.run() + return + Thread(target=executor.run, daemon=True).start() diff --git a/review_agent/file_summary/workflow_trigger.py b/review_agent/file_summary/workflow_trigger.py new file mode 100644 index 0000000..ff86c41 --- /dev/null +++ b/review_agent/file_summary/workflow_trigger.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from dataclasses import dataclass + +from review_agent.models import Conversation, FileAttachment + + +TRIGGER_KEYWORDS = ("自动汇总", "文件目录", "页数", "目录与页数", "文件清单") + + +@dataclass(frozen=True) +class TriggerResult: + should_start: bool + workflow_type: str = "" + reason: str = "" + + +def evaluate_file_summary_trigger(conversation: Conversation, content: str) -> TriggerResult: + text = (content or "").strip() + if not any(keyword in text for keyword in TRIGGER_KEYWORDS): + return TriggerResult(should_start=False, reason="not_matched") + + has_attachment = FileAttachment.objects.filter( + conversation=conversation, + is_active=True, + ).exclude(upload_status=FileAttachment.UploadStatus.DELETED).exists() + if not has_attachment: + return TriggerResult(should_start=False, reason="missing_attachment") + + return TriggerResult(should_start=True, workflow_type="file_summary") diff --git a/review_agent/services.py b/review_agent/services.py index 43a3a2f..c4b352b 100644 --- a/review_agent/services.py +++ b/review_agent/services.py @@ -3,8 +3,11 @@ from __future__ import annotations import json from django.db.models import Q, QuerySet +from django.conf import settings from django.utils import timezone +from .file_summary.workflow import create_file_summary_batch, start_file_summary_workflow +from .file_summary.workflow_trigger import evaluate_file_summary_trigger from .llm import LLMConfigurationError, LLMRequestError, generate_reply, stream_reply from .models import Conversation, Message @@ -88,6 +91,7 @@ def stream_message(conversation: Conversation, content: str): user_message = append_user_message(conversation, content) assistant_parts: list[str] = [] + trigger = evaluate_file_summary_trigger(conversation, content) yield sse_event( "meta", @@ -99,6 +103,51 @@ def stream_message(conversation: Conversation, content: str): }, ) + if trigger.reason == "missing_attachment": + reply_content = "请先在当前对话右侧上传需要汇总的文件或压缩包,然后再发送自动汇总指令。" + assistant_message = append_assistant_message(conversation, reply_content) + yield sse_event("chunk", {"delta": reply_content}) + yield sse_event( + "done", + { + "assistant_message_id": assistant_message.pk, + "conversation_id": conversation.pk, + "title": conversation.title, + }, + ) + return + + if trigger.should_start: + batch = create_file_summary_batch( + conversation=conversation, + user=conversation.user, + trigger_message=user_message, + ) + start_file_summary_workflow( + batch, + async_run=getattr(settings, "FILE_SUMMARY_ASYNC", True), + ) + reply_content = f"已启动文件目录与页数自动汇总工作流,批次号:{batch.batch_no}。" + assistant_message = append_assistant_message(conversation, reply_content) + yield sse_event( + "workflow_started", + { + "workflow_type": "file_summary", + "batch_id": batch.pk, + "batch_no": batch.batch_no, + }, + ) + yield sse_event("chunk", {"delta": reply_content}) + yield sse_event( + "done", + { + "assistant_message_id": assistant_message.pk, + "conversation_id": conversation.pk, + "title": conversation.title, + }, + ) + return + try: for chunk in stream_reply(conversation, content): assistant_parts.append(chunk) diff --git a/review_agent/urls.py b/review_agent/urls.py index 272291d..5f6fac3 100644 --- a/review_agent/urls.py +++ b/review_agent/urls.py @@ -1,6 +1,6 @@ from django.urls import path -from .file_summary.views import attachment_detail, attachments +from .file_summary.views import attachment_detail, attachments, batch_events, batch_status urlpatterns = [ @@ -19,4 +19,14 @@ urlpatterns = [ attachment_detail, name="file_summary_attachment_detail", ), + path( + "api/review-agent/file-summary//status/", + batch_status, + name="file_summary_batch_status", + ), + path( + "api/review-agent/file-summary//events/", + batch_events, + name="file_summary_batch_events", + ), ] diff --git a/tests/test_file_summary_trigger.py b/tests/test_file_summary_trigger.py new file mode 100644 index 0000000..4d94164 --- /dev/null +++ b/tests/test_file_summary_trigger.py @@ -0,0 +1,32 @@ +import pytest + +from review_agent.file_summary.workflow_trigger import evaluate_file_summary_trigger +from review_agent.models import Conversation, FileAttachment + + +pytestmark = pytest.mark.django_db + + +def test_trigger_matches_keywords_only_when_active_attachment_exists(django_user_model): + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + + no_file = evaluate_file_summary_trigger(conversation, "请自动汇总文件目录与页数") + assert no_file.should_start is False + assert no_file.reason == "missing_attachment" + + FileAttachment.objects.create( + conversation=conversation, + user=user, + original_name="a.docx", + storage_path="x/a.docx", + file_size=1, + ) + + matched = evaluate_file_summary_trigger(conversation, "请自动汇总文件目录与页数") + assert matched.should_start is True + assert matched.workflow_type == "file_summary" + + normal = evaluate_file_summary_trigger(conversation, "你好,帮我解释法规") + assert normal.should_start is False + assert normal.reason == "not_matched" diff --git a/tests/test_file_summary_workflow.py b/tests/test_file_summary_workflow.py new file mode 100644 index 0000000..ea50817 --- /dev/null +++ b/tests/test_file_summary_workflow.py @@ -0,0 +1,102 @@ +import pytest + +from review_agent.file_summary.workflow import create_file_summary_batch, start_file_summary_workflow +from review_agent.models import ( + Conversation, + FileAttachment, + FileSummaryBatch, + FileSummaryBatchAttachment, + Message, + WorkflowEvent, + WorkflowNodeRun, +) +from review_agent.services import stream_message + + +pytestmark = pytest.mark.django_db + + +def test_create_batch_binds_active_attachments_and_initializes_nodes(django_user_model): + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + message = Message.objects.create(conversation=conversation, role=Message.Role.USER, content="自动汇总") + active = FileAttachment.objects.create( + conversation=conversation, + user=user, + original_name="a.docx", + storage_path="x/a.docx", + file_size=1, + ) + FileAttachment.objects.create( + conversation=conversation, + user=user, + original_name="old.docx", + is_active=False, + storage_path="x/old.docx", + file_size=1, + ) + + batch = create_file_summary_batch(conversation=conversation, user=user, trigger_message=message) + + assert batch.status == FileSummaryBatch.Status.PENDING + assert FileSummaryBatchAttachment.objects.get(batch=batch).attachment == active + active.refresh_from_db() + assert active.upload_status == FileAttachment.UploadStatus.BOUND + assert WorkflowNodeRun.objects.filter(batch=batch).count() >= 6 + assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_created").exists() + + +def test_start_file_summary_workflow_runs_synchronously_for_tests(django_user_model): + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + message = Message.objects.create(conversation=conversation, role=Message.Role.USER, content="自动汇总") + FileAttachment.objects.create( + conversation=conversation, + user=user, + original_name="a.docx", + storage_path="x/a.docx", + file_size=1, + ) + batch = create_file_summary_batch(conversation=conversation, user=user, trigger_message=message) + + start_file_summary_workflow(batch, async_run=False) + + batch.refresh_from_db() + assert batch.status == FileSummaryBatch.Status.SUCCESS + assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_completed").exists() + + +def test_stream_message_returns_workflow_meta_when_triggered(settings, django_user_model): + settings.FILE_SUMMARY_ASYNC = False + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + FileAttachment.objects.create( + conversation=conversation, + user=user, + original_name="a.docx", + storage_path="x/a.docx", + file_size=1, + ) + + frames = list(stream_message(conversation, "请自动汇总文件目录与页数")) + + joined = "".join(frames) + assert "workflow_started" in joined + assert "\"workflow_type\": \"file_summary\"" in joined + assert FileSummaryBatch.objects.filter(conversation=conversation).exists() + + +def test_stream_message_uses_normal_llm_path_when_not_triggered(monkeypatch, django_user_model): + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + + def fake_stream_reply(conversation, content): + yield "普通回复" + + monkeypatch.setattr("review_agent.services.stream_reply", fake_stream_reply) + + frames = list(stream_message(conversation, "你好")) + + joined = "".join(frames) + assert "普通回复" in joined + assert "workflow_started" not in joined From 18d045d4874c9c95c186fc2a970064f0f2aba333 Mon Sep 17 00:00:00 2001 From: bruce Date: Sat, 6 Jun 2026 01:20:26 +0800 Subject: [PATCH 4/7] =?UTF-8?q?feat(file-summary):=20=E5=AE=9E=E7=8E=B0?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=A4=84=E7=90=86=E6=8A=80=E8=83=BD=E9=93=BE?= =?UTF-8?q?=E8=B7=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- review_agent/file_summary/paths.py | 12 +++ .../file_summary/services/__init__.py | 1 + review_agent/file_summary/services/archive.py | 77 +++++++++++++++++++ .../file_summary/services/inventory.py | 49 ++++++++++++ .../file_summary/services/page_count.py | 59 ++++++++++++++ .../file_summary/services/product_detect.py | 31 ++++++++ review_agent/file_summary/skills/__init__.py | 1 + .../file_summary/skills/archive_extract.py | 26 +++++++ review_agent/file_summary/skills/base.py | 24 ++++++ .../skills/document_page_count.py | 64 +++++++++++++++ .../file_summary/skills/file_inventory.py | 21 +++++ .../file_summary/skills/product_detect.py | 12 +++ review_agent/file_summary/skills/registry.py | 22 ++++++ review_agent/file_summary/workflow.py | 43 ++++++++--- tests/test_file_summary_archive.py | 25 ++++++ tests/test_file_summary_inventory.py | 24 ++++++ tests/test_file_summary_page_count.py | 66 ++++++++++++++++ tests/test_file_summary_product_detect.py | 29 +++++++ tests/test_file_summary_skills.py | 27 +++++++ 19 files changed, 604 insertions(+), 9 deletions(-) create mode 100644 review_agent/file_summary/paths.py create mode 100644 review_agent/file_summary/services/__init__.py create mode 100644 review_agent/file_summary/services/archive.py create mode 100644 review_agent/file_summary/services/inventory.py create mode 100644 review_agent/file_summary/services/page_count.py create mode 100644 review_agent/file_summary/services/product_detect.py create mode 100644 review_agent/file_summary/skills/__init__.py create mode 100644 review_agent/file_summary/skills/archive_extract.py create mode 100644 review_agent/file_summary/skills/base.py create mode 100644 review_agent/file_summary/skills/document_page_count.py create mode 100644 review_agent/file_summary/skills/file_inventory.py create mode 100644 review_agent/file_summary/skills/product_detect.py create mode 100644 review_agent/file_summary/skills/registry.py create mode 100644 tests/test_file_summary_archive.py create mode 100644 tests/test_file_summary_inventory.py create mode 100644 tests/test_file_summary_page_count.py create mode 100644 tests/test_file_summary_product_detect.py create mode 100644 tests/test_file_summary_skills.py diff --git a/review_agent/file_summary/paths.py b/review_agent/file_summary/paths.py new file mode 100644 index 0000000..8735825 --- /dev/null +++ b/review_agent/file_summary/paths.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from pathlib import Path + +from django.conf import settings + + +def resolve_storage_path(storage_path: str) -> Path: + path = Path(storage_path) + if path.is_absolute(): + return path + return Path(settings.MEDIA_ROOT) / path diff --git a/review_agent/file_summary/services/__init__.py b/review_agent/file_summary/services/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/review_agent/file_summary/services/__init__.py @@ -0,0 +1 @@ + diff --git a/review_agent/file_summary/services/archive.py b/review_agent/file_summary/services/archive.py new file mode 100644 index 0000000..9e554e8 --- /dev/null +++ b/review_agent/file_summary/services/archive.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +import subprocess +from pathlib import Path +from zipfile import ZipFile + +import py7zr + + +ARCHIVE_EXTENSIONS = {"zip", "7z", "rar"} + + +def _ensure_inside_target(path: Path, target_dir: Path) -> None: + target = target_dir.resolve() + resolved = path.resolve() + if target != resolved and target not in resolved.parents: + raise ValueError("解压路径必须位于批次工作目录内。") + + +def _safe_member_path(target_dir: Path, member_name: str) -> Path: + destination = target_dir / member_name + _ensure_inside_target(destination, target_dir) + return destination + + +def extract_archive(archive_path: str | Path, target_dir: str | Path) -> list[Path]: + archive_path = Path(archive_path) + target_dir = Path(target_dir) + target_dir.mkdir(parents=True, exist_ok=True) + ext = archive_path.suffix.lower().lstrip(".") + if ext not in ARCHIVE_EXTENSIONS: + return [] + + if ext == "zip": + return _extract_zip(archive_path, target_dir) + if ext == "7z": + return _extract_7z(archive_path, target_dir) + return _extract_rar(archive_path, target_dir) + + +def _extract_zip(archive_path: Path, target_dir: Path) -> list[Path]: + extracted: list[Path] = [] + with ZipFile(archive_path) as archive: + for member in archive.infolist(): + destination = _safe_member_path(target_dir, member.filename) + if member.is_dir(): + destination.mkdir(parents=True, exist_ok=True) + continue + destination.parent.mkdir(parents=True, exist_ok=True) + with archive.open(member) as source, destination.open("wb") as target: + target.write(source.read()) + extracted.append(destination) + return extracted + + +def _extract_7z(archive_path: Path, target_dir: Path) -> list[Path]: + with py7zr.SevenZipFile(archive_path, mode="r") as archive: + names = archive.getnames() + for name in names: + _safe_member_path(target_dir, name) + archive.extractall(path=target_dir) + return [target_dir / name for name in names if (target_dir / name).is_file()] + + +def _extract_rar(archive_path: Path, target_dir: Path) -> list[Path]: + result = subprocess.run( + ["7z", "x", f"-o{target_dir}", str(archive_path), "-y"], + check=False, + capture_output=True, + text=True, + ) + if result.returncode != 0: + raise RuntimeError(result.stderr or result.stdout or "rar 解压失败") + extracted = [path for path in target_dir.rglob("*") if path.is_file()] + for path in extracted: + _ensure_inside_target(path, target_dir) + return extracted diff --git a/review_agent/file_summary/services/inventory.py b/review_agent/file_summary/services/inventory.py new file mode 100644 index 0000000..e7282db --- /dev/null +++ b/review_agent/file_summary/services/inventory.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from pathlib import Path + +from review_agent.models import FileSummaryBatch, FileSummaryItem + + +SUPPORTED_EXTENSIONS = {"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx"} + + +def _directory_level(relative_path: Path) -> str: + if len(relative_path.parts) <= 1: + return "" + return "/".join(relative_path.parts[:-1]) + + +def scan_files_to_items(*, batch: FileSummaryBatch, roots: list[Path]) -> list[FileSummaryItem]: + files: list[tuple[Path, Path]] = [] + for root in roots: + root = Path(root) + if root.is_file(): + files.append((root.parent, root)) + continue + for path in sorted(item for item in root.rglob("*") if item.is_file()): + if path.name.startswith(".") or path.stat().st_size == 0: + continue + files.append((root, path)) + + created: list[FileSummaryItem] = [] + for index, (root, path) in enumerate(files, start=1): + relative = path.relative_to(root).as_posix() + file_type = path.suffix.lower().lstrip(".") + item = FileSummaryItem.objects.create( + batch=batch, + file_index=index, + directory_level=_directory_level(Path(relative)), + file_name=path.name, + file_type=file_type, + relative_path=relative, + storage_path=str(path), + statistics_status=FileSummaryItem.StatisticsStatus.SKIPPED, + ) + created.append(item) + + batch.total_files = len(created) + batch.supported_files = sum(1 for item in created if item.file_type in SUPPORTED_EXTENSIONS) + batch.unsupported_files = len(created) - batch.supported_files + batch.save(update_fields=["total_files", "supported_files", "unsupported_files"]) + return created diff --git a/review_agent/file_summary/services/page_count.py b/review_agent/file_summary/services/page_count.py new file mode 100644 index 0000000..3a90b9b --- /dev/null +++ b/review_agent/file_summary/services/page_count.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + + +SUPPORTED_EXTENSIONS = {"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx"} + + +@dataclass(frozen=True) +class PageCountResult: + status: str + page_count: int | None = None + error_message: str = "" + + +def count_document_pages(path: str | Path) -> PageCountResult: + file_path = Path(path) + ext = file_path.suffix.lower().lstrip(".") + if ext not in SUPPORTED_EXTENSIONS: + return PageCountResult(status="unsupported") + + try: + if ext == "pdf": + from pypdf import PdfReader + + return PageCountResult(status="success", page_count=len(PdfReader(str(file_path)).pages)) + if ext == "docx": + from docx import Document + + properties = Document(str(file_path)).core_properties + pages = getattr(properties, "pages", None) + if pages: + return PageCountResult(status="success", page_count=pages) + return PageCountResult(status="uncertain") + if ext == "xlsx": + from openpyxl import load_workbook + + workbook = load_workbook(str(file_path), read_only=True, data_only=True) + return PageCountResult(status="success", page_count=len(workbook.sheetnames)) + if ext == "xls": + import xlrd + + workbook = xlrd.open_workbook(str(file_path), on_demand=True) + return PageCountResult(status="success", page_count=workbook.nsheets) + if ext == "pptx": + from pptx import Presentation + + return PageCountResult(status="success", page_count=len(Presentation(str(file_path)).slides)) + if ext in {"doc", "ppt"}: + import olefile + + if olefile.isOleFile(str(file_path)): + return PageCountResult(status="uncertain") + return PageCountResult(status="failed", error_message="不是有效的 OLE 文件。") + except Exception as exc: + return PageCountResult(status="failed", error_message=str(exc)) + + return PageCountResult(status="uncertain") diff --git a/review_agent/file_summary/services/product_detect.py b/review_agent/file_summary/services/product_detect.py new file mode 100644 index 0000000..ff48dba --- /dev/null +++ b/review_agent/file_summary/services/product_detect.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from pathlib import Path + +from review_agent.models import FileSummaryBatch + + +def detect_product_name(batch: FileSummaryBatch) -> str: + product_name = "" + for item in batch.items.order_by("file_index"): + parts = Path(item.relative_path).parts + if len(parts) > 1: + product_name = parts[0] + break + name = Path(item.file_name).stem + for keyword in ("产品", "试剂盒", "说明书"): + if keyword in name: + product_name = name + break + if product_name: + break + + if not product_name: + return "" + + batch.product_name = product_name + batch.save(update_fields=["product_name"]) + if batch.conversation.title.startswith("新对话"): + batch.conversation.title = f"{product_name}-文件汇总" + batch.conversation.save(update_fields=["title", "updated_at"]) + return product_name diff --git a/review_agent/file_summary/skills/__init__.py b/review_agent/file_summary/skills/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/review_agent/file_summary/skills/__init__.py @@ -0,0 +1 @@ + diff --git a/review_agent/file_summary/skills/archive_extract.py b/review_agent/file_summary/skills/archive_extract.py new file mode 100644 index 0000000..83487b8 --- /dev/null +++ b/review_agent/file_summary/skills/archive_extract.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from pathlib import Path + +from review_agent.models import FileSummaryBatchAttachment + +from ..paths import resolve_storage_path +from ..services.archive import ARCHIVE_EXTENSIONS, extract_archive +from .base import BaseSkill, SkillResult, WorkflowContext + + +class ArchiveExtractSkill(BaseSkill): + name = "archive_extract" + + def run(self, context: WorkflowContext) -> SkillResult: + extracted_count = 0 + target_dir = Path(context.batch.work_dir or "") + if not target_dir: + return SkillResult(success=True, data={"extracted_count": 0}) + + for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch): + path = resolve_storage_path(binding.attachment.storage_path) + if path.suffix.lower().lstrip(".") not in ARCHIVE_EXTENSIONS: + continue + extracted_count += len(extract_archive(path, target_dir)) + return SkillResult(success=True, data={"extracted_count": extracted_count}) diff --git a/review_agent/file_summary/skills/base.py b/review_agent/file_summary/skills/base.py new file mode 100644 index 0000000..b8e6313 --- /dev/null +++ b/review_agent/file_summary/skills/base.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + +from review_agent.models import FileSummaryBatch + + +@dataclass(frozen=True) +class WorkflowContext: + batch: FileSummaryBatch + + +@dataclass +class SkillResult: + success: bool + data: dict = field(default_factory=dict) + message: str = "" + + +class BaseSkill: + name = "" + + def run(self, context: WorkflowContext) -> SkillResult: + raise NotImplementedError diff --git a/review_agent/file_summary/skills/document_page_count.py b/review_agent/file_summary/skills/document_page_count.py new file mode 100644 index 0000000..f53ad77 --- /dev/null +++ b/review_agent/file_summary/skills/document_page_count.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from review_agent.models import FileSummaryItem + +from ..services.page_count import SUPPORTED_EXTENSIONS, count_document_pages +from .base import BaseSkill, SkillResult, WorkflowContext + + +class DocumentPageCountSkill(BaseSkill): + name = "document_page_count" + + def run(self, context: WorkflowContext) -> SkillResult: + success_files = failed_files = unsupported_files = uncertain_files = total_pages = 0 + for item in context.batch.items.order_by("file_index"): + if item.file_type not in SUPPORTED_EXTENSIONS: + item.statistics_status = FileSummaryItem.StatisticsStatus.UNSUPPORTED + unsupported_files += 1 + item.save(update_fields=["statistics_status", "updated_at"]) + continue + + result = None + for attempt in range(1, 4): + result = count_document_pages(item.storage_path) + item.retry_count = attempt - 1 + if result.status != "failed": + break + item.statistics_status = result.status + item.page_count = result.page_count + item.error_message = result.error_message + item.save( + update_fields=[ + "statistics_status", + "page_count", + "retry_count", + "error_message", + "updated_at", + ] + ) + + if result.status == FileSummaryItem.StatisticsStatus.SUCCESS: + success_files += 1 + total_pages += result.page_count or 0 + elif result.status == FileSummaryItem.StatisticsStatus.UNCERTAIN: + uncertain_files += 1 + elif result.status == FileSummaryItem.StatisticsStatus.UNSUPPORTED: + unsupported_files += 1 + else: + failed_files += 1 + + context.batch.success_files = success_files + context.batch.failed_files = failed_files + context.batch.unsupported_files = unsupported_files + context.batch.uncertain_files = uncertain_files + context.batch.total_pages = total_pages + context.batch.save( + update_fields=[ + "success_files", + "failed_files", + "unsupported_files", + "uncertain_files", + "total_pages", + ] + ) + return SkillResult(success=True) diff --git a/review_agent/file_summary/skills/file_inventory.py b/review_agent/file_summary/skills/file_inventory.py new file mode 100644 index 0000000..75a94dc --- /dev/null +++ b/review_agent/file_summary/skills/file_inventory.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from pathlib import Path + +from review_agent.models import FileSummaryBatchAttachment + +from ..paths import resolve_storage_path +from ..services.inventory import scan_files_to_items +from .base import BaseSkill, SkillResult, WorkflowContext + + +class FileInventorySkill(BaseSkill): + name = "file_inventory" + + def run(self, context: WorkflowContext) -> SkillResult: + roots = [ + resolve_storage_path(binding.attachment.storage_path) + for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch) + ] + items = scan_files_to_items(batch=context.batch, roots=roots) + return SkillResult(success=True, data={"total_files": len(items)}) diff --git a/review_agent/file_summary/skills/product_detect.py b/review_agent/file_summary/skills/product_detect.py new file mode 100644 index 0000000..cf86b63 --- /dev/null +++ b/review_agent/file_summary/skills/product_detect.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from ..services.product_detect import detect_product_name +from .base import BaseSkill, SkillResult, WorkflowContext + + +class ProductDetectSkill(BaseSkill): + name = "product_detect" + + def run(self, context: WorkflowContext) -> SkillResult: + product_name = detect_product_name(context.batch) + return SkillResult(success=True, data={"product_name": product_name}) diff --git a/review_agent/file_summary/skills/registry.py b/review_agent/file_summary/skills/registry.py new file mode 100644 index 0000000..9dde1e7 --- /dev/null +++ b/review_agent/file_summary/skills/registry.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from .base import BaseSkill, SkillResult, WorkflowContext + + +class SkillRegistry: + def __init__(self): + self._skills: dict[str, BaseSkill] = {} + + def register(self, skill: BaseSkill) -> None: + if not skill.name: + raise ValueError("Skill 必须声明 name。") + self._skills[skill.name] = skill + + def get(self, name: str) -> BaseSkill: + try: + return self._skills[name] + except KeyError as exc: + raise KeyError(f"Skill 未注册:{name}") from exc + + def execute(self, name: str, context: WorkflowContext) -> SkillResult: + return self.get(name).run(context) diff --git a/review_agent/file_summary/workflow.py b/review_agent/file_summary/workflow.py index 9316350..65b517f 100644 --- a/review_agent/file_summary/workflow.py +++ b/review_agent/file_summary/workflow.py @@ -16,19 +16,34 @@ from review_agent.models import ( ) from .events import record_event +from .skills.archive_extract import ArchiveExtractSkill +from .skills.base import WorkflowContext +from .skills.document_page_count import DocumentPageCountSkill +from .skills.file_inventory import FileInventorySkill +from .skills.product_detect import ProductDetectSkill +from .skills.registry import SkillRegistry NODE_DEFINITIONS = [ - ("upload", "附件固化"), - ("extract", "压缩包解压"), - ("inventory", "文件扫描"), - ("page_count", "页数统计"), - ("product_detect", "产品识别"), - ("report", "报告输出"), - ("complete", "完成"), + ("upload", "附件固化", ""), + ("extract", "压缩包解压", "archive_extract"), + ("inventory", "文件扫描", "file_inventory"), + ("page_count", "页数统计", "document_page_count"), + ("product_detect", "产品识别", "product_detect"), + ("report", "报告输出", ""), + ("complete", "完成", ""), ] +def default_skill_registry() -> SkillRegistry: + registry = SkillRegistry() + registry.register(ArchiveExtractSkill()) + registry.register(FileInventorySkill()) + registry.register(DocumentPageCountSkill()) + registry.register(ProductDetectSkill()) + return registry + + def build_batch_no() -> str: return f"FS-{timezone.localtime().strftime('%Y%m%d%H%M%S')}-{uuid4().hex[:6]}" @@ -61,7 +76,7 @@ def create_file_summary_batch( attachment.upload_status = FileAttachment.UploadStatus.BOUND attachment.save(update_fields=["upload_status"]) - for code, name in NODE_DEFINITIONS: + for code, name, _skill_name in NODE_DEFINITIONS: WorkflowNodeRun.objects.create(batch=batch, node_code=code, node_name=name) record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no}) @@ -69,8 +84,9 @@ def create_file_summary_batch( class WorkflowExecutor: - def __init__(self, batch: FileSummaryBatch): + def __init__(self, batch: FileSummaryBatch, registry: SkillRegistry | None = None): self.batch = batch + self.registry = registry or default_skill_registry() def run(self) -> None: self.batch.status = FileSummaryBatch.Status.RUNNING @@ -107,6 +123,15 @@ class WorkflowExecutor: {"node_code": node.node_code, "status": node.status, "progress": node.progress}, ) + skill_name = next( + (skill for code, _name, skill in NODE_DEFINITIONS if code == node.node_code), + "", + ) + if skill_name: + result = self.registry.execute(skill_name, WorkflowContext(batch=self.batch)) + if not result.success: + raise RuntimeError(result.message or f"{node.node_name}执行失败") + node.status = WorkflowNodeRun.Status.SUCCESS node.progress = 100 node.finished_at = timezone.now() diff --git a/tests/test_file_summary_archive.py b/tests/test_file_summary_archive.py new file mode 100644 index 0000000..29a1a80 --- /dev/null +++ b/tests/test_file_summary_archive.py @@ -0,0 +1,25 @@ +from zipfile import ZipFile +import pytest + +from review_agent.file_summary.services.archive import extract_archive + + +def test_extract_zip_preserves_safe_paths(tmp_path): + archive_path = tmp_path / "safe.zip" + with ZipFile(archive_path, "w") as archive: + archive.writestr("dir/a.txt", "content") + + target = tmp_path / "out" + extracted = extract_archive(archive_path, target) + + assert extracted == [target / "dir" / "a.txt"] + assert (target / "dir" / "a.txt").read_text(encoding="utf-8") == "content" + + +def test_extract_zip_rejects_path_traversal(tmp_path): + archive_path = tmp_path / "evil.zip" + with ZipFile(archive_path, "w") as archive: + archive.writestr("../evil.txt", "bad") + + with pytest.raises(ValueError): + extract_archive(archive_path, tmp_path / "out") diff --git a/tests/test_file_summary_inventory.py b/tests/test_file_summary_inventory.py new file mode 100644 index 0000000..74758a5 --- /dev/null +++ b/tests/test_file_summary_inventory.py @@ -0,0 +1,24 @@ +from pathlib import Path +import pytest + +from review_agent.file_summary.services.inventory import scan_files_to_items +from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem + + +pytestmark = pytest.mark.django_db + + +def test_scan_files_to_items_preserves_relative_paths(tmp_path, django_user_model): + root = tmp_path / "work" + (root / "a").mkdir(parents=True) + (root / "a" / "one.pdf").write_bytes(b"pdf") + (root / "two.txt").write_text("x", encoding="utf-8") + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-I") + + items = scan_files_to_items(batch=batch, roots=[root]) + + assert [item.relative_path for item in items] == ["a/one.pdf", "two.txt"] + assert FileSummaryItem.objects.filter(batch=batch).count() == 2 + assert items[0].statistics_status == FileSummaryItem.StatisticsStatus.SKIPPED diff --git a/tests/test_file_summary_page_count.py b/tests/test_file_summary_page_count.py new file mode 100644 index 0000000..e3c6077 --- /dev/null +++ b/tests/test_file_summary_page_count.py @@ -0,0 +1,66 @@ +import pytest +from docx import Document +from openpyxl import Workbook +from pptx import Presentation + +from review_agent.file_summary.services.page_count import count_document_pages +from review_agent.file_summary.skills.document_page_count import DocumentPageCountSkill +from review_agent.file_summary.skills.base import WorkflowContext +from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem + + +pytestmark = pytest.mark.django_db + + +def test_count_document_pages_for_office_formats(tmp_path): + docx_path = tmp_path / "a.docx" + Document().save(docx_path) + + xlsx_path = tmp_path / "a.xlsx" + workbook = Workbook() + workbook.create_sheet("第二页") + workbook.save(xlsx_path) + + pptx_path = tmp_path / "a.pptx" + presentation = Presentation() + presentation.slides.add_slide(presentation.slide_layouts[6]) + presentation.save(pptx_path) + + assert count_document_pages(docx_path).status in {"success", "uncertain"} + assert count_document_pages(xlsx_path).page_count == 2 + assert count_document_pages(pptx_path).page_count == 1 + + +def test_document_page_count_skill_marks_unsupported_and_success(tmp_path, django_user_model): + xlsx_path = tmp_path / "a.xlsx" + workbook = Workbook() + workbook.save(xlsx_path) + txt_path = tmp_path / "a.txt" + txt_path.write_text("x", encoding="utf-8") + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-P") + xlsx_item = FileSummaryItem.objects.create( + batch=batch, + file_index=1, + file_name="a.xlsx", + file_type="xlsx", + relative_path="a.xlsx", + storage_path=str(xlsx_path), + ) + txt_item = FileSummaryItem.objects.create( + batch=batch, + file_index=2, + file_name="a.txt", + file_type="txt", + relative_path="a.txt", + storage_path=str(txt_path), + ) + + result = DocumentPageCountSkill().run(WorkflowContext(batch=batch)) + + xlsx_item.refresh_from_db() + txt_item.refresh_from_db() + assert result.success is True + assert xlsx_item.statistics_status == FileSummaryItem.StatisticsStatus.SUCCESS + assert txt_item.statistics_status == FileSummaryItem.StatisticsStatus.UNSUPPORTED diff --git a/tests/test_file_summary_product_detect.py b/tests/test_file_summary_product_detect.py new file mode 100644 index 0000000..8cf895c --- /dev/null +++ b/tests/test_file_summary_product_detect.py @@ -0,0 +1,29 @@ +import pytest + +from review_agent.file_summary.services.product_detect import detect_product_name +from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem + + +pytestmark = pytest.mark.django_db + + +def test_detect_product_name_from_top_level_directory(django_user_model): + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="新对话 06-06") + batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-D") + FileSummaryItem.objects.create( + batch=batch, + file_index=1, + file_name="说明书.docx", + file_type="docx", + relative_path="甲型试剂盒/说明书.docx", + storage_path="x", + ) + + product_name = detect_product_name(batch) + + batch.refresh_from_db() + conversation.refresh_from_db() + assert product_name == "甲型试剂盒" + assert batch.product_name == "甲型试剂盒" + assert conversation.title == "甲型试剂盒-文件汇总" diff --git a/tests/test_file_summary_skills.py b/tests/test_file_summary_skills.py new file mode 100644 index 0000000..a700155 --- /dev/null +++ b/tests/test_file_summary_skills.py @@ -0,0 +1,27 @@ +import pytest + +from review_agent.file_summary.skills.base import BaseSkill, SkillResult, WorkflowContext +from review_agent.file_summary.skills.registry import SkillRegistry + + +class EchoSkill(BaseSkill): + name = "echo" + + def run(self, context): + return SkillResult(success=True, data={"batch_id": context.batch.id}) + + +@pytest.mark.django_db +def test_skill_registry_executes_registered_skill(django_user_model): + from review_agent.models import Conversation, FileSummaryBatch + + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-X") + registry = SkillRegistry() + registry.register(EchoSkill()) + + result = registry.execute("echo", WorkflowContext(batch=batch)) + + assert result.success is True + assert result.data == {"batch_id": batch.id} From 61bd31790b523990900110ba959d09ae3a4d6545 Mon Sep 17 00:00:00 2001 From: bruce Date: Sat, 6 Jun 2026 01:22:49 +0800 Subject: [PATCH 5/7] =?UTF-8?q?feat(file-summary):=20=E7=94=9F=E6=88=90?= =?UTF-8?q?=E6=B1=87=E6=80=BB=E6=8A=A5=E5=91=8A=E5=92=8C=E5=AF=BC=E5=87=BA?= =?UTF-8?q?=E4=B8=8B=E8=BD=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../file_summary/services/export_excel.py | 54 ++++++++++++ review_agent/file_summary/services/report.py | 65 +++++++++++++++ .../file_summary/skills/summary_report.py | 33 ++++++++ review_agent/file_summary/views.py | 21 ++++- review_agent/file_summary/workflow.py | 4 +- review_agent/urls.py | 7 +- tests/test_file_summary_report.py | 82 +++++++++++++++++++ tests/test_file_summary_views.py | 25 +++++- 8 files changed, 286 insertions(+), 5 deletions(-) create mode 100644 review_agent/file_summary/services/export_excel.py create mode 100644 review_agent/file_summary/services/report.py create mode 100644 review_agent/file_summary/skills/summary_report.py create mode 100644 tests/test_file_summary_report.py diff --git a/review_agent/file_summary/services/export_excel.py b/review_agent/file_summary/services/export_excel.py new file mode 100644 index 0000000..2b968f3 --- /dev/null +++ b/review_agent/file_summary/services/export_excel.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from pathlib import Path + +from openpyxl import Workbook + +from review_agent.models import ExportedSummaryFile, FileSummaryBatch + + +def _exports_dir(batch: FileSummaryBatch) -> Path: + root = Path(batch.work_dir or Path("media") / "file_summary" / batch.batch_no) + export_dir = root / "exports" + export_dir.mkdir(parents=True, exist_ok=True) + return export_dir + + +def generate_excel_export(batch: FileSummaryBatch) -> ExportedSummaryFile: + workbook = Workbook() + summary = workbook.active + summary.title = "汇总信息" + summary.append(["批次号", batch.batch_no]) + summary.append(["产品名称", batch.product_name or "-"]) + summary.append(["文件总数", batch.total_files]) + summary.append(["统计成功", batch.success_files]) + summary.append(["统计失败", batch.failed_files]) + summary.append(["不支持", batch.unsupported_files]) + summary.append(["不确定", batch.uncertain_files]) + summary.append(["总页数", batch.total_pages]) + + detail = workbook.create_sheet("文件明细") + detail.append(["序号", "目录层级", "文件名", "类型", "页数", "路径", "状态", "重试次数", "异常说明"]) + for item in batch.items.order_by("file_index"): + detail.append( + [ + item.file_index, + item.directory_level, + item.file_name, + item.file_type, + item.page_count, + item.relative_path, + item.statistics_status, + item.retry_count, + item.error_message, + ] + ) + + path = _exports_dir(batch) / f"{batch.batch_no}-summary.xlsx" + workbook.save(path) + return ExportedSummaryFile.objects.create( + batch=batch, + export_type=ExportedSummaryFile.ExportType.EXCEL, + file_name=path.name, + storage_path=str(path), + ) diff --git a/review_agent/file_summary/services/report.py b/review_agent/file_summary/services/report.py new file mode 100644 index 0000000..78220f4 --- /dev/null +++ b/review_agent/file_summary/services/report.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from pathlib import Path + +from review_agent.models import ExportedSummaryFile, FileSummaryBatch + + +def _exports_dir(batch: FileSummaryBatch) -> Path: + root = Path(batch.work_dir or Path("media") / "file_summary" / batch.batch_no) + export_dir = root / "exports" + export_dir.mkdir(parents=True, exist_ok=True) + return export_dir + + +def build_summary_table(batch: FileSummaryBatch) -> str: + lines = [ + "| 序号 | 目录层级 | 文件名 | 类型 | 页数 | 状态 | 异常说明 |", + "| --- | --- | --- | --- | --- | --- | --- |", + ] + for item in batch.items.order_by("file_index"): + lines.append( + "| {index} | {directory} | {name} | {file_type} | {pages} | {status} | {error} |".format( + index=item.file_index, + directory=item.directory_level or "-", + name=item.file_name, + file_type=item.file_type, + pages=item.page_count if item.page_count is not None else "-", + status=item.statistics_status, + error=item.error_message or "-", + ) + ) + return "\n".join(lines) + + +def build_markdown_report(batch: FileSummaryBatch) -> str: + return "\n\n".join( + [ + f"# 文件目录与页数汇总报告\n\n批次号:{batch.batch_no}", + ( + "## 汇总信息\n\n" + f"- 产品名称:{batch.product_name or '-'}\n" + f"- 文件总数:{batch.total_files}\n" + f"- 统计成功:{batch.success_files}\n" + f"- 统计失败:{batch.failed_files}\n" + f"- 不支持:{batch.unsupported_files}\n" + f"- 不确定:{batch.uncertain_files}\n" + f"- 总页数:{batch.total_pages}" + ), + "## 文件明细\n\n" + build_summary_table(batch), + "## 处理说明\n\n单文件失败不会阻断批次,失败与不确定文件已在明细中标注。", + ] + ) + + +def generate_markdown_report(batch: FileSummaryBatch) -> tuple[ExportedSummaryFile, str]: + content = build_markdown_report(batch) + path = _exports_dir(batch) / f"{batch.batch_no}-summary.md" + path.write_text(content, encoding="utf-8") + exported = ExportedSummaryFile.objects.create( + batch=batch, + export_type=ExportedSummaryFile.ExportType.MARKDOWN, + file_name=path.name, + storage_path=str(path), + ) + return exported, build_summary_table(batch) diff --git a/review_agent/file_summary/skills/summary_report.py b/review_agent/file_summary/skills/summary_report.py new file mode 100644 index 0000000..3e0c043 --- /dev/null +++ b/review_agent/file_summary/skills/summary_report.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from django.urls import reverse + +from review_agent.models import Message + +from ..services.export_excel import generate_excel_export +from ..services.report import generate_markdown_report +from .base import BaseSkill, SkillResult, WorkflowContext + + +class SummaryReportSkill(BaseSkill): + name = "summary_report" + + def run(self, context: WorkflowContext) -> SkillResult: + markdown_export, summary_table = generate_markdown_report(context.batch) + excel_export = generate_excel_export(context.batch) + markdown_url = reverse("file_summary_export_download", args=[markdown_export.pk]) + excel_url = reverse("file_summary_export_download", args=[excel_export.pk]) + content = ( + "文件目录与页数汇总已完成。\n\n" + f"{summary_table}\n\n" + f"[下载 Markdown 报告]({markdown_url}) | [下载 Excel 明细]({excel_url})" + ) + Message.objects.create( + conversation=context.batch.conversation, + role=Message.Role.ASSISTANT, + content=content, + ) + return SkillResult( + success=True, + data={"markdown_export_id": markdown_export.pk, "excel_export_id": excel_export.pk}, + ) diff --git a/review_agent/file_summary/views.py b/review_agent/file_summary/views.py index fa4d169..6bee16e 100644 --- a/review_agent/file_summary/views.py +++ b/review_agent/file_summary/views.py @@ -1,8 +1,10 @@ from django.contrib.auth.decorators import login_required -from django.http import Http404, JsonResponse +from pathlib import Path + +from django.http import FileResponse, Http404, JsonResponse from django.views.decorators.http import require_http_methods -from review_agent.models import Conversation, FileAttachment +from review_agent.models import Conversation, ExportedSummaryFile, FileAttachment from review_agent.models import FileSummaryBatch, WorkflowEvent from .events import serialize_event @@ -105,3 +107,18 @@ def batch_events(request, batch_id: int): after_id = 0 events = WorkflowEvent.objects.filter(batch=batch, pk__gt=after_id).order_by("id") return JsonResponse({"events": [serialize_event(event) for event in events]}) + + +@require_http_methods(["GET"]) +@login_required +def export_download(request, export_id: int): + exported = ExportedSummaryFile.objects.filter( + pk=export_id, + batch__user=request.user, + ).first() + if not exported: + raise Http404("导出文件不存在。") + path = Path(exported.storage_path) + if not path.exists(): + return JsonResponse({"error": "文件不存在。"}, status=404) + return FileResponse(path.open("rb"), as_attachment=True, filename=exported.file_name) diff --git a/review_agent/file_summary/workflow.py b/review_agent/file_summary/workflow.py index 65b517f..050ee88 100644 --- a/review_agent/file_summary/workflow.py +++ b/review_agent/file_summary/workflow.py @@ -22,6 +22,7 @@ from .skills.document_page_count import DocumentPageCountSkill from .skills.file_inventory import FileInventorySkill from .skills.product_detect import ProductDetectSkill from .skills.registry import SkillRegistry +from .skills.summary_report import SummaryReportSkill NODE_DEFINITIONS = [ @@ -30,7 +31,7 @@ NODE_DEFINITIONS = [ ("inventory", "文件扫描", "file_inventory"), ("page_count", "页数统计", "document_page_count"), ("product_detect", "产品识别", "product_detect"), - ("report", "报告输出", ""), + ("report", "报告输出", "summary_report"), ("complete", "完成", ""), ] @@ -41,6 +42,7 @@ def default_skill_registry() -> SkillRegistry: registry.register(FileInventorySkill()) registry.register(DocumentPageCountSkill()) registry.register(ProductDetectSkill()) + registry.register(SummaryReportSkill()) return registry diff --git a/review_agent/urls.py b/review_agent/urls.py index 5f6fac3..737071d 100644 --- a/review_agent/urls.py +++ b/review_agent/urls.py @@ -1,6 +1,6 @@ from django.urls import path -from .file_summary.views import attachment_detail, attachments, batch_events, batch_status +from .file_summary.views import attachment_detail, attachments, batch_events, batch_status, export_download urlpatterns = [ @@ -29,4 +29,9 @@ urlpatterns = [ batch_events, name="file_summary_batch_events", ), + path( + "api/review-agent/file-summary/exports//download/", + export_download, + name="file_summary_export_download", + ), ] diff --git a/tests/test_file_summary_report.py b/tests/test_file_summary_report.py new file mode 100644 index 0000000..aecc240 --- /dev/null +++ b/tests/test_file_summary_report.py @@ -0,0 +1,82 @@ +from pathlib import Path +import pytest +from openpyxl import load_workbook + +from review_agent.file_summary.services.export_excel import generate_excel_export +from review_agent.file_summary.services.report import generate_markdown_report +from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem, Message + + +pytestmark = pytest.mark.django_db + + +def make_batch(tmp_path, django_user_model): + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + batch = FileSummaryBatch.objects.create( + conversation=conversation, + user=user, + batch_no="FS-R", + work_dir=str(tmp_path), + total_files=1, + success_files=1, + total_pages=2, + ) + FileSummaryItem.objects.create( + batch=batch, + file_index=1, + file_name="a.xlsx", + file_type="xlsx", + relative_path="a.xlsx", + storage_path=str(tmp_path / "a.xlsx"), + page_count=2, + statistics_status=FileSummaryItem.StatisticsStatus.SUCCESS, + ) + return batch + + +def test_generate_markdown_report_creates_export_and_summary(tmp_path, django_user_model): + batch = make_batch(tmp_path, django_user_model) + + exported, summary = generate_markdown_report(batch) + + assert exported.export_type == "markdown" + assert Path(exported.storage_path).exists() + assert "| 序号 | 目录层级 | 文件名 | 类型 | 页数 | 状态 | 异常说明 |" in summary + assert "a.xlsx" in Path(exported.storage_path).read_text(encoding="utf-8") + + +def test_generate_excel_export_contains_summary_and_items(tmp_path, django_user_model): + batch = make_batch(tmp_path, django_user_model) + + exported = generate_excel_export(batch) + + workbook = load_workbook(exported.storage_path) + assert workbook.sheetnames == ["汇总信息", "文件明细"] + assert workbook["文件明细"]["C2"].value == "a.xlsx" + + +def test_workflow_report_node_writes_assistant_message(tmp_path, settings, django_user_model): + from review_agent.file_summary.workflow import create_file_summary_batch, start_file_summary_workflow + from review_agent.models import FileAttachment + + settings.MEDIA_ROOT = tmp_path + settings.FILE_SUMMARY_ASYNC = False + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + file_path = tmp_path / "a.xlsx" + file_path.write_bytes(b"not a real workbook") + FileAttachment.objects.create( + conversation=conversation, + user=user, + original_name="a.txt", + storage_path=str(file_path), + file_size=file_path.stat().st_size, + ) + batch = create_file_summary_batch(conversation=conversation, user=user) + batch.work_dir = str(tmp_path / "batch") + batch.save(update_fields=["work_dir"]) + + start_file_summary_workflow(batch, async_run=False) + + assert Message.objects.filter(conversation=conversation, role=Message.Role.ASSISTANT).exists() diff --git a/tests/test_file_summary_views.py b/tests/test_file_summary_views.py index bbf8745..eeff753 100644 --- a/tests/test_file_summary_views.py +++ b/tests/test_file_summary_views.py @@ -2,7 +2,7 @@ from django.core.files.uploadedfile import SimpleUploadedFile from django.urls import reverse import pytest -from review_agent.models import Conversation, FileAttachment +from review_agent.models import Conversation, ExportedSummaryFile, FileAttachment, FileSummaryBatch pytestmark = pytest.mark.django_db @@ -73,3 +73,26 @@ def test_delete_attachment_is_logical_and_scoped(client, settings, tmp_path, dja assert response.status_code == 200 assert attachment.upload_status == FileAttachment.UploadStatus.DELETED assert attachment.is_active is False + + +def test_export_download_requires_batch_owner(client, tmp_path, django_user_model): + owner = django_user_model.objects.create_user(username="owner", password="pass") + other = django_user_model.objects.create_user(username="other", password="pass") + conversation = Conversation.objects.create(user=owner, title="会话") + batch = FileSummaryBatch.objects.create(conversation=conversation, user=owner, batch_no="FS-DL") + report_path = tmp_path / "summary.md" + report_path.write_text("ok", encoding="utf-8") + exported = ExportedSummaryFile.objects.create( + batch=batch, + export_type=ExportedSummaryFile.ExportType.MARKDOWN, + file_name="summary.md", + storage_path=str(report_path), + ) + + client.force_login(other) + denied = client.get(reverse("file_summary_export_download", args=[exported.pk])) + assert denied.status_code == 404 + + client.force_login(owner) + allowed = client.get(reverse("file_summary_export_download", args=[exported.pk])) + assert allowed.status_code == 200 From a917a18ca1f01cb3321a9d081856e7596128391a Mon Sep 17 00:00:00 2001 From: bruce Date: Sat, 6 Jun 2026 10:25:11 +0800 Subject: [PATCH 6/7] =?UTF-8?q?feat(file-summary):=20=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E5=89=8D=E7=AB=AF=E6=B1=87=E6=80=BB=E9=9D=A2=E6=9D=BF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/5.开发计划/1.自动汇总-前端线框图.md | 74 +++++++++ review_agent/views.py | 3 + static/css/login.css | 180 ++++++++++++++++++++- static/js/app.js | 189 ++++++++++++++++++++++- templates/home.html | 68 ++++++++ tests/test_file_summary_frontend.py | 22 +++ 6 files changed, 529 insertions(+), 7 deletions(-) create mode 100644 docs/5.开发计划/1.自动汇总-前端线框图.md create mode 100644 tests/test_file_summary_frontend.py diff --git a/docs/5.开发计划/1.自动汇总-前端线框图.md b/docs/5.开发计划/1.自动汇总-前端线框图.md new file mode 100644 index 0000000..3bb0ed1 --- /dev/null +++ b/docs/5.开发计划/1.自动汇总-前端线框图.md @@ -0,0 +1,74 @@ +# 自动汇总前端线框图 + +## 评审目标 + +在实现三栏页面前,先确认审核智能体工作台的信息架构、右侧文件汇总面板、工作流状态展示和移动端降级方式。 + +## 桌面端布局 + +```mermaid +flowchart LR + A["左栏:会话列表
新对话 / 搜索 / 历史会话"] --> B["中栏:聊天区
顶部导航 / 消息流 / 输入框"] + B --> C["右栏:文件汇总面板"] + C --> C1["上半区:上传区
拖拽上传 / 选择文件 / 上传状态"] + C --> C2["中段:当前对话附件
文件名 / 版本 / 大小 / 状态 / 删除"] + C --> C3["下半区:工作流卡片
批次号 / 节点进度 / 下载入口"] +``` + +## 右侧面板结构 + +```mermaid +flowchart TB + P["文件汇总面板"] --> U["上传拖拽区"] + U --> U0["无附件:提示上传文件或压缩包"] + U --> U1["上传中:显示文件名和处理中状态"] + U --> U2["上传失败:展示错误并允许重试"] + P --> L["附件列表"] + L --> L1["active 版本优先展示"] + L --> L2["历史版本保留展示"] + L --> L3["逻辑删除后从默认候选移除"] + P --> W["工作流卡片列表"] + W --> W1["运行中:节点逐项更新"] + W --> W2["成功:展示 Markdown/Excel 下载"] + W --> W3["失败:展示失败节点和错误说明"] +``` + +## 工作流状态流转 + +```mermaid +stateDiagram-v2 + [*] --> Pending: 用户上传附件 + Pending --> Running: 发送自动汇总提示词 + Running --> Extracting: 固化附件 + Extracting --> Scanning: 解压完成或跳过 + Scanning --> Counting: 生成文件清单 + Counting --> Detecting: 页数统计完成 + Detecting --> Reporting: 产品名识别完成 + Reporting --> Success: 生成报告与下载 + Running --> Failed: 批次级异常 + Extracting --> Failed: 解压安全检查失败 + Reporting --> Failed: 报告生成失败 + Success --> Restored: 刷新页面后状态恢复 + Failed --> Restored: 刷新页面后状态恢复 +``` + +## 移动端布局 + +```mermaid +flowchart TB + M["移动端工作台"] --> T["顶部:侧栏按钮 / 当前页面 / 用户菜单"] + T --> Chat["聊天区优先展示"] + Chat --> Composer["底部输入框"] + T --> Drawer["会话侧栏抽屉"] + Chat --> Panel["文件汇总面板下移或折叠"] + Panel --> Upload["上传区"] + Panel --> Workflow["工作流卡片"] +``` + +## 关键评审点 + +- 桌面端保持左侧会话、中间聊天、右侧文件汇总三栏,不改变现有聊天主路径。 +- 右侧面板上半部分用于上传和附件列表,下半部分用于批次工作流卡片。 +- 工作流卡片节点顺序固定为:附件固化、压缩包解压、文件扫描、页数统计、产品识别、报告输出、完成。 +- 助手消息中的文件汇总结果使用安全 Markdown 渲染,用户消息仍按纯文本转义。 +- 移动端优先保证聊天可用,文件汇总面板折叠或下移,不能遮挡输入框。 diff --git a/review_agent/views.py b/review_agent/views.py index e384834..a2aa67e 100644 --- a/review_agent/views.py +++ b/review_agent/views.py @@ -10,6 +10,7 @@ from .services import ( send_message, stream_message, ) +from .models import FileAttachment, FileSummaryBatch @login_required @@ -49,6 +50,8 @@ def workspace(request: HttpRequest) -> HttpResponse: "conversations": conversations, "current_conversation": current, "messages": current.messages.all() if current else [], + "attachments": FileAttachment.objects.filter(conversation=current).order_by("original_name", "-version_no") if current else [], + "summary_batches": FileSummaryBatch.objects.filter(conversation=current).prefetch_related("node_runs").order_by("-created_at")[:5] if current else [], }, ) diff --git a/static/css/login.css b/static/css/login.css index 7f4f93f..3162919 100644 --- a/static/css/login.css +++ b/static/css/login.css @@ -127,7 +127,7 @@ input:focus { .workspace { display: grid; - grid-template-columns: 296px minmax(0, 1fr); + grid-template-columns: 296px minmax(0, 1fr) 340px; min-height: 100vh; } @@ -760,9 +760,176 @@ input:focus { padding-right: 12px; } +.summary-panel { + display: grid; + grid-template-rows: auto auto minmax(0, 1fr); + gap: 14px; + min-width: 0; + max-height: 100vh; + padding: 16px; + overflow: auto; + border-left: 1px solid var(--line); + background: #ffffff; +} + +.summary-section { + display: grid; + gap: 12px; + padding: 14px; + border: 1px solid var(--line); + border-radius: 8px; + background: var(--panel-soft); +} + +.summary-heading, +.summary-subheading, +.workflow-card header { + display: flex; + align-items: center; + justify-content: space-between; + gap: 12px; +} + +.summary-heading h2, +.summary-subheading h3 { + margin: 0; + font-size: 16px; +} + +.summary-heading span { + color: var(--muted); + font-size: 12px; +} + +.upload-dropzone { + display: grid; + place-items: center; + gap: 6px; + min-height: 112px; + padding: 18px; + border: 1px dashed var(--accent); + border-radius: 8px; + background: #f5f9ff; + color: var(--text); + cursor: pointer; + text-align: center; +} + +.upload-dropzone.dragging { + border-color: var(--accent-dark); + background: #eaf2ff; +} + +.upload-dropzone span, +.upload-status, +.attachment-item span, +.workflow-card em { + color: var(--muted); + font-size: 12px; +} + +.upload-status { + margin: 0; + line-height: 1.5; +} + +.attachment-list, +.workflow-card-list { + display: grid; + gap: 10px; +} + +.attachment-item, +.workflow-card { + display: grid; + gap: 10px; + padding: 12px; + border: 1px solid var(--line); + border-radius: 8px; + background: #ffffff; +} + +.attachment-item { + grid-template-columns: minmax(0, 1fr) auto; + align-items: center; +} + +.attachment-item strong, +.workflow-card strong { + display: block; + overflow-wrap: anywhere; + font-size: 13px; +} + +.attachment-item em, +.workflow-status { + padding: 3px 8px; + border-radius: 999px; + background: #eaf2ff; + color: var(--accent); + font-size: 11px; + font-style: normal; + font-weight: 700; +} + +.workflow-card ol { + display: grid; + gap: 8px; + margin: 0; + padding: 0; + list-style: none; +} + +.node-status { + display: flex; + align-items: center; + justify-content: space-between; + gap: 10px; + padding: 8px 0; + border-top: 1px solid var(--line); + font-size: 13px; +} + +.status-running, +.status-retrying { + color: var(--accent); +} + +.status-success { + color: #047857; +} + +.status-failed { + color: var(--danger-text); +} + +.panel-empty { + padding: 14px; + border: 1px dashed var(--line); + border-radius: 8px; + color: var(--muted); + text-align: center; +} + +.message-bubble table { + width: 100%; + border-collapse: collapse; + font-size: 13px; +} + +.message-bubble th, +.message-bubble td { + padding: 8px; + border: 1px solid var(--line); + text-align: left; + vertical-align: top; +} + @media (max-width: 980px) { .workspace { grid-template-columns: minmax(0, 1fr); + min-height: 100vh; + overflow: auto; } .sidebar { @@ -815,7 +982,14 @@ input:focus { } .chat-stage { - height: calc(100vh - 88px); + min-height: calc(100vh - 88px); + height: auto; + } + + .summary-panel { + max-height: none; + border-left: 0; + border-top: 1px solid var(--line); } .chat-scroll { @@ -889,7 +1063,7 @@ input:focus { width: 20px; } -.node-dot { + .node-dot { width: 10px; height: 10px; } diff --git a/static/js/app.js b/static/js/app.js index 1c3ee89..e8d2155 100644 --- a/static/js/app.js +++ b/static/js/app.js @@ -11,6 +11,12 @@ var sendButton = document.getElementById("sendButton"); var conversationIdInput = document.getElementById("conversationIdInput"); var chatStage = document.querySelector(".chat-stage"); + var summaryPanel = document.getElementById("summaryPanel"); + var uploadDropzone = document.getElementById("uploadDropzone"); + var attachmentInput = document.getElementById("attachmentInput"); + var attachmentList = document.getElementById("attachmentList"); + var uploadStatus = document.getElementById("uploadStatus"); + var workflowCardList = document.getElementById("workflowCardList"); var nodeAnchors = []; if (!workspace) { @@ -32,7 +38,7 @@ function syncSidebarState() { if (isMobile()) { - if (workspace.getAttribute("data-sidebar-state") === "collapsed") { + if (workspace.getAttribute("data-sidebar-state") !== "closed") { workspace.setAttribute("data-sidebar-state", "closed"); } } else if (workspace.getAttribute("data-sidebar-state") === "closed") { @@ -147,6 +153,13 @@ return escapeHtml(text).replace(/\n/g, "
"); } + function renderAssistantContent(text) { + if (window.marked && window.DOMPurify) { + return window.DOMPurify.sanitize(window.marked.parse(text || "")); + } + return nl2br(text || ""); + } + function scrollChatToBottom() { if (chatScroll) { chatScroll.scrollTop = chatScroll.scrollHeight; @@ -169,7 +182,7 @@ bubble.className = "message-bubble"; var text = document.createElement("p"); - text.innerHTML = nl2br(content); + text.innerHTML = role === "assistant" ? renderAssistantContent(content) : nl2br(content); bubble.appendChild(text); article.appendChild(avatar); @@ -271,6 +284,149 @@ } } + function currentConversationId() { + return conversationIdInput ? conversationIdInput.value : ""; + } + + function templateUrl(attributeName, token, value) { + if (!summaryPanel) { + return ""; + } + return summaryPanel.getAttribute(attributeName).replace(token, value); + } + + function renderAttachments(attachments) { + if (!attachmentList) { + return; + } + attachmentList.innerHTML = ""; + if (!attachments.length) { + attachmentList.innerHTML = '
暂无附件
'; + return; + } + attachments.forEach(function (attachment) { + var item = document.createElement("div"); + item.className = "attachment-item"; + item.setAttribute("data-attachment-id", attachment.id); + item.innerHTML = + "
" + + escapeHtml(attachment.original_name) + + "v" + + attachment.version_no + + " · " + + attachment.file_size + + " bytes · " + + escapeHtml(attachment.upload_status) + + "
" + + (attachment.is_active ? "active" : ""); + attachmentList.appendChild(item); + }); + } + + async function refreshAttachments() { + var conversationId = currentConversationId(); + if (!conversationId || !summaryPanel) { + return; + } + var response = await fetch(templateUrl("data-attachment-url-template", "__conversation_id__", conversationId)); + if (!response.ok) { + return; + } + var payload = await response.json(); + renderAttachments(payload.attachments || []); + } + + async function uploadFiles(files) { + var conversationId = currentConversationId(); + if (!conversationId || !files.length || !summaryPanel) { + if (uploadStatus) { + uploadStatus.textContent = "请先创建或选择一个对话。"; + } + return; + } + var data = new FormData(); + Array.prototype.forEach.call(files, function (file) { + data.append("files", file); + }); + var csrf = new FormData(composer).get("csrfmiddlewaretoken"); + if (uploadStatus) { + uploadStatus.textContent = "正在上传 " + files.length + " 个文件..."; + } + try { + var response = await fetch(templateUrl("data-attachment-url-template", "__conversation_id__", conversationId), { + method: "POST", + headers: { "X-CSRFToken": csrf }, + body: data, + }); + if (!response.ok) { + throw new Error("上传失败。"); + } + var payload = await response.json(); + renderAttachments(payload.attachments || []); + if (uploadStatus) { + uploadStatus.textContent = "上传完成,可发送自动汇总提示词。"; + } + await refreshAttachments(); + } catch (error) { + if (uploadStatus) { + uploadStatus.textContent = "上传失败,请重试。"; + } + } + } + + function ensureWorkflowCard(batch) { + if (!workflowCardList || !batch) { + return null; + } + var empty = workflowCardList.querySelector(".panel-empty"); + if (empty) { + empty.remove(); + } + var card = workflowCardList.querySelector('[data-batch-id="' + batch.batch_id + '"]'); + if (card) { + return card; + } + card = document.createElement("article"); + card.className = "workflow-card"; + card.setAttribute("data-batch-id", batch.batch_id); + card.innerHTML = + "
" + + escapeHtml(batch.batch_no || "文件汇总") + + 'running
    '; + workflowCardList.prepend(card); + return card; + } + + async function refreshWorkflowCard(batchId) { + if (!summaryPanel || !batchId) { + return; + } + var response = await fetch(templateUrl("data-status-url-template", "__batch_id__", batchId)); + if (!response.ok) { + return; + } + var payload = await response.json(); + var card = ensureWorkflowCard({ + batch_id: payload.batch.id, + batch_no: payload.batch.batch_no, + }); + if (!card) { + return; + } + var status = card.querySelector(".workflow-status"); + status.textContent = payload.batch.status; + status.className = "workflow-status status-" + payload.batch.status; + var list = card.querySelector("ol"); + list.innerHTML = ""; + (payload.nodes || []).forEach(function (node) { + var item = document.createElement("li"); + item.className = "node-status status-" + node.status; + item.setAttribute("data-node-code", node.node_code); + item.innerHTML = "" + escapeHtml(node.node_name) + "" + node.progress + "%"; + list.appendChild(item); + }); + } + async function streamChat(event) { event.preventDefault(); if (!composer || !promptInput || !sendButton || !chatStage) { @@ -356,11 +512,14 @@ } } else if (eventName === "chunk") { assistantText += payload.delta || ""; - assistantMessage.text.innerHTML = nl2br(assistantText); + assistantMessage.text.innerHTML = renderAssistantContent(assistantText); scrollChatToBottom(); } else if (eventName === "error") { assistantText = payload.message || "模型调用失败。"; - assistantMessage.text.innerHTML = nl2br(assistantText); + assistantMessage.text.innerHTML = renderAssistantContent(assistantText); + } else if (eventName === "workflow_started") { + ensureWorkflowCard(payload); + refreshWorkflowCard(payload.batch_id); } else if (eventName === "done") { if (payload.assistant_message_id) { assistantMessage.article.id = "message-" + payload.assistant_message_id; @@ -400,6 +559,28 @@ composer.addEventListener("submit", streamChat); } + if (uploadDropzone && attachmentInput) { + uploadDropzone.addEventListener("click", function () { + attachmentInput.click(); + }); + uploadDropzone.addEventListener("dragover", function (event) { + event.preventDefault(); + uploadDropzone.classList.add("dragging"); + }); + uploadDropzone.addEventListener("dragleave", function () { + uploadDropzone.classList.remove("dragging"); + }); + uploadDropzone.addEventListener("drop", function (event) { + event.preventDefault(); + uploadDropzone.classList.remove("dragging"); + uploadFiles(event.dataTransfer.files); + }); + attachmentInput.addEventListener("change", function () { + uploadFiles(attachmentInput.files); + attachmentInput.value = ""; + }); + } + window.addEventListener("resize", syncSidebarState); syncSidebarState(); })(); diff --git a/templates/home.html b/templates/home.html index 88c8c26..87f9ccd 100644 --- a/templates/home.html +++ b/templates/home.html @@ -164,9 +164,77 @@ + + {% endblock %} {% block scripts %} + + {% endblock %} diff --git a/tests/test_file_summary_frontend.py b/tests/test_file_summary_frontend.py new file mode 100644 index 0000000..71d0318 --- /dev/null +++ b/tests/test_file_summary_frontend.py @@ -0,0 +1,22 @@ +import pytest +from django.urls import reverse + +from review_agent.models import Conversation + + +pytestmark = pytest.mark.django_db + + +def test_workspace_renders_summary_panel(client, django_user_model): + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + client.force_login(user) + + response = client.get(f"{reverse('home')}?conversation={conversation.pk}") + + assert response.status_code == 200 + content = response.content.decode("utf-8") + assert 'id="summaryPanel"' in content + assert 'id="uploadDropzone"' in content + assert 'id="workflowCardList"' in content + assert "自动汇总文件目录与页数" in content From 684682f86dcefe793aac9f78c24f887da6120e12 Mon Sep 17 00:00:00 2001 From: bruce Date: Sat, 6 Jun 2026 10:27:23 +0800 Subject: [PATCH 7/7] =?UTF-8?q?docs(file-summary):=20=E8=A1=A5=E5=85=85?= =?UTF-8?q?=E9=83=A8=E7=BD=B2=E5=AD=98=E5=82=A8=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 3f52755..c4cc26f 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,11 @@ Docker 或生产环境如需处理 `.7z` 与 `.rar` 压缩包,还需要安装 ```bash 7z +7z i ``` LibreOffice 不是必需依赖,仅作为未来增强老格式文档解析的可选能力。 + +上传原始文件、批次工作目录和导出文件默认存储在 Django `MEDIA_ROOT` 下的 +`file_summary/users///` 或批次 `work_dir` 目录中。生产环境 +需要把 `MEDIA_ROOT` 挂载到持久化卷,并纳入备份或归档策略。