fix(application-form-fill): 清理填表说明并收窄按钮话术
This commit is contained in:
@@ -28,6 +28,15 @@ FIELD_ALIASES = {
|
||||
"storage_condition_and_validity": ["产品储存条件及有效期", "储存条件及有效期", "储存条件", "有效期"],
|
||||
}
|
||||
|
||||
STATIC_STOP_LABELS = [
|
||||
"申请人",
|
||||
"国家药品监督管理局",
|
||||
"填表说明",
|
||||
"注",
|
||||
"保证书",
|
||||
"应附资料",
|
||||
]
|
||||
|
||||
|
||||
def collect_document_texts(summary_batch: FileSummaryBatch) -> dict[str, str]:
|
||||
texts: dict[str, str] = {}
|
||||
@@ -180,7 +189,7 @@ def _field_aliases(field: dict[str, str]) -> list[str]:
|
||||
|
||||
|
||||
def _all_field_labels(fields: list[dict[str, str]]) -> list[str]:
|
||||
labels: list[str] = []
|
||||
labels: list[str] = list(STATIC_STOP_LABELS)
|
||||
for field in fields:
|
||||
for label in _field_aliases(field):
|
||||
if label not in labels:
|
||||
@@ -194,7 +203,7 @@ def _extract_label_value(text: str, label: str, labels: list[str]) -> tuple[str,
|
||||
|
||||
def _extract_colon_label_value(text: str, label: str, labels: list[str]) -> tuple[str, str]:
|
||||
escaped_labels = "|".join(re.escape(item) for item in labels if item != label)
|
||||
stop_pattern = rf"(?=\n\s*(?:{escaped_labels})\s*[::])" if escaped_labels else r"(?=\Z)"
|
||||
stop_pattern = rf"(?=\n\s*(?:{escaped_labels})(?:\s*[::]|\s*$))" if escaped_labels else r"(?=\Z)"
|
||||
pattern = re.compile(rf"{re.escape(label)}\s*[::]\s*(.+?)(?:{stop_pattern}|\Z)", re.S)
|
||||
match = pattern.search(text or "")
|
||||
if not match:
|
||||
|
||||
Reference in New Issue
Block a user