feat(file-summary): 添加文件汇总数据模型

This commit is contained in:
2026-06-06 01:11:11 +08:00
parent b96ab1303a
commit 855afcdee3
6 changed files with 906 additions and 0 deletions

View File

@@ -18,3 +18,15 @@ python manage.py runserver
- 登录页http://127.0.0.1:8000/login/ - 登录页http://127.0.0.1:8000/login/
- 首页http://127.0.0.1:8000/ - 首页http://127.0.0.1:8000/
- 管理后台http://127.0.0.1:8000/admin/ - 管理后台http://127.0.0.1:8000/admin/
## 文件汇总依赖
自动汇总文件目录与页数功能使用轻量 Python 库读取 PDF、Word、Excel、PowerPoint 文件。
Docker 或生产环境如需处理 `.7z``.rar` 压缩包,还需要安装系统 `7z`/`p7zip`
命令,并确认以下命令可用:
```bash
7z
```
LibreOffice 不是必需依赖,仅作为未来增强老格式文档解析的可选能力。

3
pytest.ini Normal file
View File

@@ -0,0 +1,3 @@
[pytest]
DJANGO_SETTINGS_MODULE = config.settings
python_files = tests.py test_*.py *_tests.py

View File

@@ -1 +1,8 @@
Django>=5.0,<6.0 Django>=5.0,<6.0
pypdf>=5.0
python-docx>=1.1
python-pptx>=1.0
openpyxl>=3.1
xlrd>=2.0
olefile>=0.47
py7zr>=0.21

View File

@@ -0,0 +1,481 @@
# Generated by Django 5.2.14 on 2026-06-05 17:09
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("review_agent", "0001_initial"),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.CreateModel(
name="FileAttachment",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("original_name", models.CharField(max_length=255)),
("version_no", models.PositiveIntegerField(default=1)),
("is_active", models.BooleanField(default=True)),
("storage_path", models.CharField(max_length=500)),
("file_size", models.BigIntegerField(default=0)),
(
"content_type",
models.CharField(blank=True, default="", max_length=120),
),
(
"upload_status",
models.CharField(
choices=[
("uploaded", "已上传"),
("bound", "已绑定"),
("deleted", "已删除"),
],
default="uploaded",
max_length=20,
),
),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"conversation",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="file_attachments",
to="review_agent.conversation",
),
),
(
"user",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="review_file_attachments",
to=settings.AUTH_USER_MODEL,
),
),
],
options={
"db_table": "ra_file_attachment",
"ordering": ["-created_at", "-id"],
},
),
migrations.CreateModel(
name="FileSummaryBatch",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("batch_no", models.CharField(max_length=64, unique=True)),
(
"product_name",
models.CharField(blank=True, default="", max_length=200),
),
(
"status",
models.CharField(
choices=[
("pending", "待执行"),
("running", "执行中"),
("success", "成功"),
("failed", "失败"),
],
default="pending",
max_length=20,
),
),
("total_files", models.IntegerField(default=0)),
("supported_files", models.IntegerField(default=0)),
("success_files", models.IntegerField(default=0)),
("failed_files", models.IntegerField(default=0)),
("unsupported_files", models.IntegerField(default=0)),
("uncertain_files", models.IntegerField(default=0)),
("total_pages", models.IntegerField(default=0)),
("work_dir", models.CharField(blank=True, default="", max_length=500)),
("error_message", models.TextField(blank=True, default="")),
("created_at", models.DateTimeField(auto_now_add=True)),
("started_at", models.DateTimeField(blank=True, null=True)),
("finished_at", models.DateTimeField(blank=True, null=True)),
(
"conversation",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="file_summary_batches",
to="review_agent.conversation",
),
),
(
"trigger_message",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="triggered_file_summary_batches",
to="review_agent.message",
),
),
(
"user",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="review_file_summary_batches",
to=settings.AUTH_USER_MODEL,
),
),
],
options={
"db_table": "ra_file_summary_batch",
"ordering": ["-created_at", "-id"],
},
),
migrations.CreateModel(
name="ExportedSummaryFile",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"export_type",
models.CharField(
choices=[("markdown", "Markdown"), ("excel", "Excel")],
max_length=20,
),
),
("file_name", models.CharField(max_length=255)),
("storage_path", models.CharField(max_length=500)),
(
"status",
models.CharField(
choices=[("success", "成功"), ("failed", "失败")],
default="success",
max_length=20,
),
),
("error_message", models.TextField(blank=True, default="")),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="exports",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_exported_summary_file",
"ordering": ["-created_at", "-id"],
},
),
migrations.CreateModel(
name="FileSummaryBatchAttachment",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"source_role",
models.CharField(
choices=[("archive", "压缩包"), ("multi_file", "多文件")],
default="multi_file",
max_length=20,
),
),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"attachment",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="batch_bindings",
to="review_agent.fileattachment",
),
),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="batch_attachments",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_file_summary_batch_attachment",
},
),
migrations.CreateModel(
name="FileSummaryItem",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("file_index", models.PositiveIntegerField()),
(
"directory_level",
models.CharField(blank=True, default="", max_length=300),
),
("file_name", models.CharField(max_length=255)),
("file_type", models.CharField(max_length=20)),
("relative_path", models.CharField(max_length=500)),
("storage_path", models.CharField(max_length=500)),
("page_count", models.IntegerField(blank=True, null=True)),
(
"statistics_status",
models.CharField(
choices=[
("success", "成功"),
("failed", "失败"),
("unsupported", "不支持"),
("uncertain", "不确定"),
("skipped", "跳过"),
],
default="skipped",
max_length=20,
),
),
("retry_count", models.PositiveIntegerField(default=0)),
("error_message", models.TextField(blank=True, default="")),
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="items",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_file_summary_item",
"ordering": ["file_index", "id"],
},
),
migrations.CreateModel(
name="WorkflowEvent",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("event_type", models.CharField(max_length=40)),
("payload", models.JSONField(default=dict)),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="events",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_workflow_event",
"ordering": ["id"],
},
),
migrations.CreateModel(
name="WorkflowNodeRun",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("node_code", models.CharField(max_length=40)),
("node_name", models.CharField(max_length=80)),
(
"status",
models.CharField(
choices=[
("pending", "等待中"),
("running", "执行中"),
("retrying", "重试中"),
("success", "成功"),
("failed", "失败"),
("skipped", "跳过"),
],
default="pending",
max_length=20,
),
),
("progress", models.PositiveIntegerField(default=0)),
("message", models.TextField(blank=True, default="")),
("started_at", models.DateTimeField(blank=True, null=True)),
("finished_at", models.DateTimeField(blank=True, null=True)),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="node_runs",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_workflow_node_run",
},
),
migrations.AddIndex(
model_name="fileattachment",
index=models.Index(
fields=["conversation", "created_at"],
name="idx_ra_attachment_conv_created",
),
),
migrations.AddIndex(
model_name="fileattachment",
index=models.Index(
fields=["user", "created_at"], name="idx_ra_attachment_user_created"
),
),
migrations.AddIndex(
model_name="fileattachment",
index=models.Index(
fields=["conversation", "original_name", "is_active"],
name="idx_ra_attachment_active",
),
),
migrations.AddConstraint(
model_name="fileattachment",
constraint=models.UniqueConstraint(
fields=("conversation", "original_name", "version_no"),
name="uq_ra_attachment_conv_name_version",
),
),
migrations.AddIndex(
model_name="filesummarybatch",
index=models.Index(
fields=["conversation", "created_at"], name="idx_ra_batch_conv_created"
),
),
migrations.AddIndex(
model_name="filesummarybatch",
index=models.Index(
fields=["user", "created_at"], name="idx_ra_batch_user_created"
),
),
migrations.AddIndex(
model_name="filesummarybatch",
index=models.Index(
fields=["status", "created_at"], name="idx_ra_batch_status"
),
),
migrations.AddIndex(
model_name="exportedsummaryfile",
index=models.Index(
fields=["batch", "export_type"], name="idx_ra_export_batch_type"
),
),
migrations.AddIndex(
model_name="exportedsummaryfile",
index=models.Index(
fields=["batch", "created_at"], name="idx_ra_export_batch_created"
),
),
migrations.AddIndex(
model_name="filesummarybatchattachment",
index=models.Index(
fields=["batch", "created_at"], name="idx_ra_batch_attachment_batch"
),
),
migrations.AddIndex(
model_name="filesummarybatchattachment",
index=models.Index(fields=["attachment"], name="idx_ra_batch_attach_file"),
),
migrations.AddConstraint(
model_name="filesummarybatchattachment",
constraint=models.UniqueConstraint(
fields=("batch", "attachment"), name="uq_ra_batch_attachment"
),
),
migrations.AddIndex(
model_name="filesummaryitem",
index=models.Index(
fields=["batch", "file_index"], name="idx_ra_item_batch_index"
),
),
migrations.AddIndex(
model_name="filesummaryitem",
index=models.Index(
fields=["batch", "statistics_status"], name="idx_ra_item_batch_status"
),
),
migrations.AddIndex(
model_name="filesummaryitem",
index=models.Index(
fields=["batch", "file_type"], name="idx_ra_item_batch_type"
),
),
migrations.AddConstraint(
model_name="filesummaryitem",
constraint=models.UniqueConstraint(
fields=("batch", "relative_path"), name="uq_ra_item_batch_relative_path"
),
),
migrations.AddIndex(
model_name="workflowevent",
index=models.Index(fields=["batch", "id"], name="idx_ra_event_batch_id"),
),
migrations.AddIndex(
model_name="workflowevent",
index=models.Index(
fields=["batch", "created_at"], name="idx_ra_event_batch_created"
),
),
migrations.AddIndex(
model_name="workflownoderun",
index=models.Index(
fields=["batch", "status"], name="idx_ra_node_batch_status"
),
),
migrations.AddConstraint(
model_name="workflownoderun",
constraint=models.UniqueConstraint(
fields=("batch", "node_code"), name="uq_ra_node_batch_code"
),
),
]

View File

@@ -42,3 +42,293 @@ class Message(models.Model):
def __str__(self) -> str: def __str__(self) -> str:
return f"{self.get_role_display()} - {self.conversation_id}" return f"{self.get_role_display()} - {self.conversation_id}"
class FileAttachment(models.Model):
"""Stores an uploaded file version for one conversation."""
class UploadStatus(models.TextChoices):
UPLOADED = "uploaded", "已上传"
BOUND = "bound", "已绑定"
DELETED = "deleted", "已删除"
conversation = models.ForeignKey(
Conversation,
on_delete=models.CASCADE,
related_name="file_attachments",
)
user = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
related_name="review_file_attachments",
)
original_name = models.CharField(max_length=255)
version_no = models.PositiveIntegerField(default=1)
is_active = models.BooleanField(default=True)
storage_path = models.CharField(max_length=500)
file_size = models.BigIntegerField(default=0)
content_type = models.CharField(max_length=120, blank=True, default="")
upload_status = models.CharField(
max_length=20,
choices=UploadStatus.choices,
default=UploadStatus.UPLOADED,
)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "ra_file_attachment"
ordering = ["-created_at", "-id"]
constraints = [
models.UniqueConstraint(
fields=["conversation", "original_name", "version_no"],
name="uq_ra_attachment_conv_name_version",
)
]
indexes = [
models.Index(
fields=["conversation", "created_at"],
name="idx_ra_attachment_conv_created",
),
models.Index(
fields=["user", "created_at"],
name="idx_ra_attachment_user_created",
),
models.Index(
fields=["conversation", "original_name", "is_active"],
name="idx_ra_attachment_active",
),
]
def __str__(self) -> str:
return f"{self.original_name} v{self.version_no}"
class FileSummaryBatch(models.Model):
"""Tracks one automatic file inventory and page-count workflow run."""
class Status(models.TextChoices):
PENDING = "pending", "待执行"
RUNNING = "running", "执行中"
SUCCESS = "success", "成功"
FAILED = "failed", "失败"
conversation = models.ForeignKey(
Conversation,
on_delete=models.CASCADE,
related_name="file_summary_batches",
)
user = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
related_name="review_file_summary_batches",
)
trigger_message = models.ForeignKey(
Message,
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="triggered_file_summary_batches",
)
batch_no = models.CharField(max_length=64, unique=True)
product_name = models.CharField(max_length=200, blank=True, default="")
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
total_files = models.IntegerField(default=0)
supported_files = models.IntegerField(default=0)
success_files = models.IntegerField(default=0)
failed_files = models.IntegerField(default=0)
unsupported_files = models.IntegerField(default=0)
uncertain_files = models.IntegerField(default=0)
total_pages = models.IntegerField(default=0)
work_dir = models.CharField(max_length=500, blank=True, default="")
error_message = models.TextField(blank=True, default="")
created_at = models.DateTimeField(auto_now_add=True)
started_at = models.DateTimeField(null=True, blank=True)
finished_at = models.DateTimeField(null=True, blank=True)
class Meta:
db_table = "ra_file_summary_batch"
ordering = ["-created_at", "-id"]
indexes = [
models.Index(fields=["conversation", "created_at"], name="idx_ra_batch_conv_created"),
models.Index(fields=["user", "created_at"], name="idx_ra_batch_user_created"),
models.Index(fields=["status", "created_at"], name="idx_ra_batch_status"),
]
def __str__(self) -> str:
return self.batch_no
class FileSummaryBatchAttachment(models.Model):
"""Binds a workflow batch to the exact attachment versions it uses."""
class SourceRole(models.TextChoices):
ARCHIVE = "archive", "压缩包"
MULTI_FILE = "multi_file", "多文件"
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="batch_attachments",
)
attachment = models.ForeignKey(
FileAttachment,
on_delete=models.CASCADE,
related_name="batch_bindings",
)
source_role = models.CharField(
max_length=20,
choices=SourceRole.choices,
default=SourceRole.MULTI_FILE,
)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "ra_file_summary_batch_attachment"
constraints = [
models.UniqueConstraint(
fields=["batch", "attachment"],
name="uq_ra_batch_attachment",
)
]
indexes = [
models.Index(
fields=["batch", "created_at"],
name="idx_ra_batch_attachment_batch",
),
models.Index(fields=["attachment"], name="idx_ra_batch_attach_file"),
]
class FileSummaryItem(models.Model):
"""Stores one scanned file and its page-count result."""
class StatisticsStatus(models.TextChoices):
SUCCESS = "success", "成功"
FAILED = "failed", "失败"
UNSUPPORTED = "unsupported", "不支持"
UNCERTAIN = "uncertain", "不确定"
SKIPPED = "skipped", "跳过"
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="items",
)
file_index = models.PositiveIntegerField()
directory_level = models.CharField(max_length=300, blank=True, default="")
file_name = models.CharField(max_length=255)
file_type = models.CharField(max_length=20)
relative_path = models.CharField(max_length=500)
storage_path = models.CharField(max_length=500)
page_count = models.IntegerField(null=True, blank=True)
statistics_status = models.CharField(
max_length=20,
choices=StatisticsStatus.choices,
default=StatisticsStatus.SKIPPED,
)
retry_count = models.PositiveIntegerField(default=0)
error_message = models.TextField(blank=True, default="")
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
db_table = "ra_file_summary_item"
ordering = ["file_index", "id"]
constraints = [
models.UniqueConstraint(
fields=["batch", "relative_path"],
name="uq_ra_item_batch_relative_path",
)
]
indexes = [
models.Index(fields=["batch", "file_index"], name="idx_ra_item_batch_index"),
models.Index(fields=["batch", "statistics_status"], name="idx_ra_item_batch_status"),
models.Index(fields=["batch", "file_type"], name="idx_ra_item_batch_type"),
]
class WorkflowNodeRun(models.Model):
"""Stores recoverable status for one workflow node."""
class Status(models.TextChoices):
PENDING = "pending", "等待中"
RUNNING = "running", "执行中"
RETRYING = "retrying", "重试中"
SUCCESS = "success", "成功"
FAILED = "failed", "失败"
SKIPPED = "skipped", "跳过"
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="node_runs",
)
node_code = models.CharField(max_length=40)
node_name = models.CharField(max_length=80)
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
progress = models.PositiveIntegerField(default=0)
message = models.TextField(blank=True, default="")
started_at = models.DateTimeField(null=True, blank=True)
finished_at = models.DateTimeField(null=True, blank=True)
class Meta:
db_table = "ra_workflow_node_run"
constraints = [
models.UniqueConstraint(fields=["batch", "node_code"], name="uq_ra_node_batch_code")
]
indexes = [
models.Index(fields=["batch", "status"], name="idx_ra_node_batch_status"),
]
class WorkflowEvent(models.Model):
"""Persists workflow events for SSE replay and diagnostics."""
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="events",
)
event_type = models.CharField(max_length=40)
payload = models.JSONField(default=dict)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "ra_workflow_event"
ordering = ["id"]
indexes = [
models.Index(fields=["batch", "id"], name="idx_ra_event_batch_id"),
models.Index(fields=["batch", "created_at"], name="idx_ra_event_batch_created"),
]
class ExportedSummaryFile(models.Model):
"""Stores generated report files for permission-checked download."""
class ExportType(models.TextChoices):
MARKDOWN = "markdown", "Markdown"
EXCEL = "excel", "Excel"
class Status(models.TextChoices):
SUCCESS = "success", "成功"
FAILED = "failed", "失败"
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="exports",
)
export_type = models.CharField(max_length=20, choices=ExportType.choices)
file_name = models.CharField(max_length=255)
storage_path = models.CharField(max_length=500)
status = models.CharField(max_length=20, choices=Status.choices, default=Status.SUCCESS)
error_message = models.TextField(blank=True, default="")
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "ra_exported_summary_file"
ordering = ["-created_at", "-id"]
indexes = [
models.Index(fields=["batch", "export_type"], name="idx_ra_export_batch_type"),
models.Index(fields=["batch", "created_at"], name="idx_ra_export_batch_created"),
]

View File

@@ -0,0 +1,113 @@
import pytest
from django.contrib.auth import get_user_model
from django.db import IntegrityError, transaction
from review_agent.models import (
Conversation,
ExportedSummaryFile,
FileAttachment,
FileSummaryBatch,
FileSummaryBatchAttachment,
FileSummaryItem,
)
pytestmark = pytest.mark.django_db
def create_user(username="u1"):
return get_user_model().objects.create_user(username=username, password="pass")
def test_attachment_versions_are_unique_per_conversation_and_name():
user = create_user()
conversation = Conversation.objects.create(user=user, title="会话")
first = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="资料.docx",
version_no=1,
is_active=False,
storage_path="media/a.docx",
file_size=10,
)
second = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="资料.docx",
version_no=2,
storage_path="media/b.docx",
file_size=12,
)
assert first.version_no == 1
assert second.version_no == 2
with pytest.raises(IntegrityError), transaction.atomic():
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="资料.docx",
version_no=2,
storage_path="media/c.docx",
file_size=14,
)
def test_batch_attachment_and_item_unique_constraints():
user = create_user()
conversation = Conversation.objects.create(user=user, title="会话")
attachment = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="资料.docx",
storage_path="media/a.docx",
file_size=10,
)
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-001",
)
FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment)
with pytest.raises(IntegrityError), transaction.atomic():
FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment)
FileSummaryItem.objects.create(
batch=batch,
file_index=1,
file_name="资料.docx",
file_type="docx",
relative_path="资料.docx",
storage_path="media/a.docx",
)
with pytest.raises(IntegrityError), transaction.atomic():
FileSummaryItem.objects.create(
batch=batch,
file_index=2,
file_name="资料.docx",
file_type="docx",
relative_path="资料.docx",
storage_path="media/a.docx",
)
def test_exported_file_traces_to_user_and_conversation():
user = create_user()
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-002",
)
exported = ExportedSummaryFile.objects.create(
batch=batch,
export_type=ExportedSummaryFile.ExportType.MARKDOWN,
file_name="summary.md",
storage_path="media/summary.md",
)
assert exported.batch.user == user
assert exported.batch.conversation == conversation