From 503cc55b220995da881431c05b2a845c1046681a Mon Sep 17 00:00:00 2001 From: KeepingRunning <1599949878@qq.com> Date: Tue, 20 May 2025 03:27:46 +0800 Subject: [PATCH 1/6] =?UTF-8?q?fix:=20=E4=BF=9D=E6=8C=81=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B=E4=B8=80=E8=87=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/official_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config/official_configs.py b/src/config/official_configs.py index 6ad4648b..807b9e8c 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -64,7 +64,7 @@ class IdentityConfig(ConfigBase): height: int = 170 """身高(单位:厘米)""" - weight: float = 50 + weight: int = 50 """体重(单位:千克)""" age: int = 18 From 10a6f91df49a3593d2476e1e174c8192cdc13377 Mon Sep 17 00:00:00 2001 From: KeepingRunning <1599949878@qq.com> Date: Wed, 21 May 2025 02:00:42 +0800 Subject: [PATCH 2/6] =?UTF-8?q?fix:=20=E5=85=A8=E9=83=A8=E6=94=B9=E6=88=90?= =?UTF-8?q?float?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/official_configs.py | 2 +- src/individuality/identity.py | 6 +++--- src/individuality/individuality.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/config/official_configs.py b/src/config/official_configs.py index 807b9e8c..6ad4648b 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -64,7 +64,7 @@ class IdentityConfig(ConfigBase): height: int = 170 """身高(单位:厘米)""" - weight: int = 50 + weight: float = 50 """体重(单位:千克)""" age: int = 18 diff --git a/src/individuality/identity.py b/src/individuality/identity.py index fd0d70f3..f79da547 100644 --- a/src/individuality/identity.py +++ b/src/individuality/identity.py @@ -8,7 +8,7 @@ class Identity: identity_detail: List[str] # 身份细节描述 height: int # 身高(厘米) - weight: int # 体重(千克) + weight: float # 体重(千克) age: int # 年龄 gender: str # 性别 appearance: str # 外貌特征 @@ -24,7 +24,7 @@ class Identity: self, identity_detail: List[str] = None, height: int = 0, - weight: int = 0, + weight: float = 0, age: int = 0, gender: str = "", appearance: str = "", @@ -61,7 +61,7 @@ class Identity: @classmethod def initialize( - cls, identity_detail: List[str], height: int, weight: int, age: int, gender: str, appearance: str + cls, identity_detail: List[str], height: int, weight: float, age: int, gender: str, appearance: str ) -> "Identity": """初始化身份特征 diff --git a/src/individuality/individuality.py b/src/individuality/individuality.py index 38131ea1..27d50268 100644 --- a/src/individuality/individuality.py +++ b/src/individuality/individuality.py @@ -46,7 +46,7 @@ class Individuality: personality_sides: list, identity_detail: list, height: int, - weight: int, + weight: float, age: int, gender: str, appearance: str, From 8f4489a14566c9b1e5bccda6d0beca979d3f9739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A2=A8=E6=A2=93=E6=9F=92?= <1787882683@qq.com> Date: Wed, 21 May 2025 09:16:01 +0800 Subject: [PATCH 3/6] =?UTF-8?q?feat:=20=E5=B0=9D=E8=AF=95=E4=BD=BF?= =?UTF-8?q?=E7=94=A8json=5Frepair=E5=BA=93=E4=BB=A3=E6=9B=BFHRAG=E7=9A=84J?= =?UTF-8?q?SON=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/knowledge/src/utils/json_fix.py | 76 ++++-------------------- 1 file changed, 12 insertions(+), 64 deletions(-) diff --git a/src/chat/knowledge/src/utils/json_fix.py b/src/chat/knowledge/src/utils/json_fix.py index a83eb491..46da2d73 100644 --- a/src/chat/knowledge/src/utils/json_fix.py +++ b/src/chat/knowledge/src/utils/json_fix.py @@ -1,76 +1,24 @@ import json +from json_repair import repair_json +# 以下代码用于修复损坏的 JSON 字符串。 -def _find_unclosed(json_str): - """ - Identifies the unclosed braces and brackets in the JSON string. - - Args: - json_str (str): The JSON string to analyze. - - Returns: - list: A list of unclosed elements in the order they were opened. - """ - unclosed = [] - inside_string = False - escape_next = False - - for char in json_str: - if inside_string: - if escape_next: - escape_next = False - elif char == "\\": - escape_next = True - elif char == '"': - inside_string = False - else: - if char == '"': - inside_string = True - elif char in "{[": - unclosed.append(char) - elif char in "}]": - if unclosed and ((char == "}" and unclosed[-1] == "{") or (char == "]" and unclosed[-1] == "[")): - unclosed.pop() - - return unclosed - - -# The following code is used to fix a broken JSON string. -# From HippoRAG2 (GitHub: OSU-NLP-Group/HippoRAG) def fix_broken_generated_json(json_str: str) -> str: """ - Fixes a malformed JSON string by: - - Removing the last comma and any trailing content. - - Iterating over the JSON string once to determine and fix unclosed braces or brackets. - - Ensuring braces and brackets inside string literals are not considered. + 使用 json-repair 库修复格式错误的 JSON 字符串。 - If the original json_str string can be successfully loaded by json.loads(), will directly return it without any modification. + 如果原始 json_str 字符串可以被 json.loads() 成功加载,则直接返回而不进行任何修改。 - Args: - json_str (str): The malformed JSON string to be fixed. + 参数: + json_str (str): 需要修复的格式错误的 JSON 字符串。 - Returns: - str: The corrected JSON string. + 返回: + str: 修复后的 JSON 字符串。 """ - try: - # Try to load the JSON to see if it is valid + # 尝试加载 JSON 以查看其是否有效 json.loads(json_str) - return json_str # Return as-is if valid + return json_str # 如果有效则按原样返回 except json.JSONDecodeError: - pass - - # Step 1: Remove trailing content after the last comma. - last_comma_index = json_str.rfind(",") - if last_comma_index != -1: - json_str = json_str[:last_comma_index] - - # Step 2: Identify unclosed braces and brackets. - unclosed_elements = _find_unclosed(json_str) - - # Step 3: Append the necessary closing elements in reverse order of opening. - closing_map = {"{": "}", "[": "]"} - for open_char in reversed(unclosed_elements): - json_str += closing_map[open_char] - - return json_str + # 如果无效,则尝试修复它 + return repair_json(json_str) From 3b89eb6ea8a97ca7a045a08df639500257edf883 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 21 May 2025 01:16:20 +0000 Subject: [PATCH 4/6] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/knowledge/src/utils/json_fix.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/chat/knowledge/src/utils/json_fix.py b/src/chat/knowledge/src/utils/json_fix.py index 46da2d73..e9b13ea1 100644 --- a/src/chat/knowledge/src/utils/json_fix.py +++ b/src/chat/knowledge/src/utils/json_fix.py @@ -3,6 +3,7 @@ from json_repair import repair_json # 以下代码用于修复损坏的 JSON 字符串。 + def fix_broken_generated_json(json_str: str) -> str: """ 使用 json-repair 库修复格式错误的 JSON 字符串。 From e82a5f7d47bfe41d7d85ec1b6e3b40774f31d614 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A2=A8=E6=A2=93=E6=9F=92?= <1787882683@qq.com> Date: Wed, 21 May 2025 09:20:17 +0800 Subject: [PATCH 5/6] =?UTF-8?q?feat:=20=E4=BF=9D=E7=95=99=E5=8E=9F?= =?UTF-8?q?=E6=9D=A5=E7=9A=84=E4=BF=AE=E5=A4=8D=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/knowledge/src/ie_process.py | 6 +- src/chat/knowledge/src/utils/json_fix.py | 76 +++++++++++++++++++++++- 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/src/chat/knowledge/src/ie_process.py b/src/chat/knowledge/src/ie_process.py index 0bbe2169..ddc5eb02 100644 --- a/src/chat/knowledge/src/ie_process.py +++ b/src/chat/knowledge/src/ie_process.py @@ -6,7 +6,7 @@ from .global_logger import logger from . import prompt_template from .lpmmconfig import global_config, INVALID_ENTITY from .llm_client import LLMClient -from .utils.json_fix import fix_broken_generated_json +from .utils.json_fix import new_fix_broken_generated_json def _entity_extract(llm_client: LLMClient, paragraph: str) -> List[str]: @@ -24,7 +24,7 @@ def _entity_extract(llm_client: LLMClient, paragraph: str) -> List[str]: if "]" in request_result: request_result = request_result[: request_result.rindex("]") + 1] - entity_extract_result = json.loads(fix_broken_generated_json(request_result)) + entity_extract_result = json.loads(new_fix_broken_generated_json(request_result)) entity_extract_result = [ entity @@ -53,7 +53,7 @@ def _rdf_triple_extract(llm_client: LLMClient, paragraph: str, entities: list) - if "]" in request_result: request_result = request_result[: request_result.rindex("]") + 1] - entity_extract_result = json.loads(fix_broken_generated_json(request_result)) + entity_extract_result = json.loads(new_fix_broken_generated_json(request_result)) for triple in entity_extract_result: if len(triple) != 3 or (triple[0] is None or triple[1] is None or triple[2] is None) or "" in triple: diff --git a/src/chat/knowledge/src/utils/json_fix.py b/src/chat/knowledge/src/utils/json_fix.py index e9b13ea1..dd91499e 100644 --- a/src/chat/knowledge/src/utils/json_fix.py +++ b/src/chat/knowledge/src/utils/json_fix.py @@ -1,10 +1,82 @@ import json from json_repair import repair_json -# 以下代码用于修复损坏的 JSON 字符串。 +def _find_unclosed(json_str): + """ + Identifies the unclosed braces and brackets in the JSON string. + + Args: + json_str (str): The JSON string to analyze. + + Returns: + list: A list of unclosed elements in the order they were opened. + """ + unclosed = [] + inside_string = False + escape_next = False + + for char in json_str: + if inside_string: + if escape_next: + escape_next = False + elif char == "\\": + escape_next = True + elif char == '"': + inside_string = False + else: + if char == '"': + inside_string = True + elif char in "{[": + unclosed.append(char) + elif char in "}]": + if unclosed and ((char == "}" and unclosed[-1] == "{") or (char == "]" and unclosed[-1] == "[")): + unclosed.pop() + + return unclosed +# The following code is used to fix a broken JSON string. +# From HippoRAG2 (GitHub: OSU-NLP-Group/HippoRAG) def fix_broken_generated_json(json_str: str) -> str: + """ + Fixes a malformed JSON string by: + - Removing the last comma and any trailing content. + - Iterating over the JSON string once to determine and fix unclosed braces or brackets. + - Ensuring braces and brackets inside string literals are not considered. + + If the original json_str string can be successfully loaded by json.loads(), will directly return it without any modification. + + Args: + json_str (str): The malformed JSON string to be fixed. + + Returns: + str: The corrected JSON string. + """ + + try: + # Try to load the JSON to see if it is valid + json.loads(json_str) + return json_str # Return as-is if valid + except json.JSONDecodeError: + pass + + # Step 1: Remove trailing content after the last comma. + last_comma_index = json_str.rfind(",") + if last_comma_index != -1: + json_str = json_str[:last_comma_index] + + # Step 2: Identify unclosed braces and brackets. + unclosed_elements = _find_unclosed(json_str) + + # Step 3: Append the necessary closing elements in reverse order of opening. + closing_map = {"{": "}", "[": "]"} + for open_char in reversed(unclosed_elements): + json_str += closing_map[open_char] + + return json_str + + +def new_fix_broken_generated_json(json_str: str) -> str: """ 使用 json-repair 库修复格式错误的 JSON 字符串。 @@ -22,4 +94,4 @@ def fix_broken_generated_json(json_str: str) -> str: return json_str # 如果有效则按原样返回 except json.JSONDecodeError: # 如果无效,则尝试修复它 - return repair_json(json_str) + return repair_json(json_str) \ No newline at end of file From 6730b60e0a5823eac2d66e4567f81f05d9908a4e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 21 May 2025 01:20:30 +0000 Subject: [PATCH 6/6] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/knowledge/src/utils/json_fix.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/chat/knowledge/src/utils/json_fix.py b/src/chat/knowledge/src/utils/json_fix.py index dd91499e..53fa8f36 100644 --- a/src/chat/knowledge/src/utils/json_fix.py +++ b/src/chat/knowledge/src/utils/json_fix.py @@ -1,6 +1,7 @@ import json from json_repair import repair_json + def _find_unclosed(json_str): """ Identifies the unclosed braces and brackets in the JSON string. @@ -94,4 +95,4 @@ def new_fix_broken_generated_json(json_str: str) -> str: return json_str # 如果有效则按原样返回 except json.JSONDecodeError: # 如果无效,则尝试修复它 - return repair_json(json_str) \ No newline at end of file + return repair_json(json_str)