feat: 校验根据映射表添加作者链接

2025-07-26 19:41:11 +08:00
parent 1351e9dc09
commit f0201f39e5
30 changed files with 379 additions and 137 deletions
--- a/build/author_config.json
+++ b/build/author_config.json
@@ -1,6 +1,7 @@
 {
    "rename": {
-        "起个名字好难": "起个名字好难的喵"
+        "起个名字好难": "起个名字好难的喵",
+        "this-Fish": "蜜柑魚"
    },
    "links": {
        "秋云": "https://github.com/physligl",
--- a/build/pathing_authors.py
+++ b/build/pathing_authors.py
@@ -1,136 +1,169 @@
 import os
-import sys
 import json

-# 获取配置文件路径（和脚本在同一目录）
-script_dir = os.path.dirname(os.path.abspath(__file__))
-config_path = os.path.join(script_dir, "author_config.json")
+def process_json_authors(input_path, config_path="author_config.json", verbose=True):
+    """
+    处理 JSON 文件中的作者信息（支持 author → authors 结构化迁移、作者名重命名和链接统一）
+    
+    参数：
+        input_path (str): 要处理的文件路径或目录路径
+        config_path (str): 配置文件路径（默认在脚本同级）
+        verbose (bool): 是否打印详细日志信息
+        
+    返回：
+        dict: 包含处理总数和修改数量的统计信息
+    """
+    result = {
+        "total_files": 0,
+        "modified_files": 0,
+        "errors": []
+    }

-# 获取要处理的文件夹路径
-if len(sys.argv) < 2:
-    print("❌ 用法：python pathing_authors.py <JSON目录路径>")
-    sys.exit(1)
+    if not os.path.exists(input_path):
+        raise FileNotFoundError(f"路径不存在：{input_path}")
+    if not os.path.exists(config_path):
+        raise FileNotFoundError(f"配置文件不存在：{config_path}")

-folder_path = sys.argv[1]
+    # 加载配置
+    try:
+        with open(config_path, "r", encoding="utf-8") as f:
+            config = json.load(f)
+    except Exception as e:
+        raise RuntimeError(f"配置文件加载失败：{e}")

-if not os.path.exists(folder_path):
-    print(f"❌ JSON目录不存在：{folder_path}")
-    sys.exit(1)
-if not os.path.exists(config_path):
-    print(f"❌ 配置文件不存在：{config_path}")
-    sys.exit(1)
+    author_rename = config.get("rename", {})
+    author_links = config.get("links", {})

-# 加载配置
-try:
-    with open(config_path, "r", encoding="utf-8") as f:
-        config = json.load(f)
-except Exception as e:
-    print(f"❌ 配置文件加载失败：{e}")
-    sys.exit(1)
+    # 构建待处理文件列表
+    file_list = []
+    if os.path.isfile(input_path) and input_path.endswith(".json"):
+        file_list.append(input_path)
+    elif os.path.isdir(input_path):
+        for root, dirs, files in os.walk(input_path):
+            for filename in files:
+                if filename.endswith(".json"):
+                    file_list.append(os.path.join(root, filename))
+    else:
+        raise ValueError("输入路径必须是 .json 文件或目录")

-author_rename = config.get("rename", {})
-author_links = config.get("links", {})
-
-print(f"🚀 启动，处理目录：{folder_path}")
-count_total = 0
-count_modified = 0
-
-for root, dirs, files in os.walk(folder_path):
-    for filename in files:
-        if filename.endswith(".json"):
-            count_total += 1
-            file_path = os.path.join(root, filename)
+    for file_path in file_list:
+        result["total_files"] += 1
+        if verbose:
            print(f"\n🔍 处理文件：{file_path}")

-            try:
-                with open(file_path, "r", encoding="utf-8") as f:
-                    data = json.load(f)
-            except Exception as e:
-                print(f"❌ 解析失败：{e}")
-                continue
+        try:
+            with open(file_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+        except Exception as e:
+            msg = f"❌ 解析失败：{e}"
+            if verbose:
+                print(msg)
+            result["errors"].append((file_path, str(e)))
+            continue

-            info = data.get("info")
-            if not isinstance(info, dict):
+        info = data.get("info")
+        if not isinstance(info, dict):
+            if verbose:
                print("⚠️ 缺少 info 字段")
-                continue
+            continue

-            modified = False
-            author_field = info.get("author")
+        modified = False
+        author_field = info.get("author")

-            if author_field is not None:
-                # 旧格式字符串处理
-                if isinstance(author_field, str):
-                    names = [name.strip() for name in author_field.split("&")]
-                    new_authors = []
-                    for name in names:
-                        new_name = author_rename.get(name, name)
-                        author_obj = {"name": new_name}
-                        if new_name in author_links:
-                            author_obj["links"] = author_links[new_name]
-                        new_authors.append(author_obj)
-                    data["info"]["authors"] = new_authors
-                    modified = True
+        if author_field is not None:
+            if isinstance(author_field, str):
+                names = [name.strip() for name in author_field.split("&")]
+                new_authors = []
+                for name in names:
+                    new_name = author_rename.get(name, name)
+                    author_obj = {"name": new_name}
+                    if new_name in author_links:
+                        author_obj["links"] = author_links[new_name]
+                    new_authors.append(author_obj)
+                data["info"]["authors"] = new_authors
+                modified = True
+                if verbose:
                    print("✅ 替换为结构化 authors")

-                elif isinstance(author_field, list):
-                    for author_obj in author_field:
-                        if not isinstance(author_obj, dict):
-                            continue
-                        name = author_obj.get("name")
-                        if not name:
-                            continue
-                        new_name = author_rename.get(name, name)
-                        if name != new_name:
-                            author_obj["name"] = new_name
-                            modified = True
+            elif isinstance(author_field, list):
+                for author_obj in author_field:
+                    if not isinstance(author_obj, dict):
+                        continue
+                    name = author_obj.get("name")
+                    if not name:
+                        continue
+                    new_name = author_rename.get(name, name)
+                    if name != new_name:
+                        author_obj["name"] = new_name
+                        modified = True
+                        if verbose:
                            print(f"📝 重命名：{name} → {new_name}")

-                        existing_link = author_obj.pop("link", None) or author_obj.pop("url", None) or author_obj.get("links")
-                        if new_name in author_links:
-                            if author_obj.get("links") != author_links[new_name]:
-                                author_obj["links"] = author_links[new_name]
-                                modified = True
-                                print(f"🔧 更新链接：{new_name} → {author_links[new_name]}")
-                        elif "links" not in author_obj and existing_link:
-                            author_obj["links"] = existing_link
+                    existing_link = author_obj.pop("link", None) or author_obj.pop("url", None) or author_obj.get("links")
+                    if new_name in author_links:
+                        if author_obj.get("links") != author_links[new_name]:
+                            author_obj["links"] = author_links[new_name]
                            modified = True
+                            if verbose:
+                                print(f"🔧 更新链接：{new_name} → {author_links[new_name]}")
+                    elif "links" not in author_obj and existing_link:
+                        author_obj["links"] = existing_link
+                        modified = True
+                        if verbose:
                            print(f"🔄 标准化已有链接字段为 links → {existing_link}")

-            else:
-                # 🔧 处理已有结构化 authors 字段，补充 links
-                authors_field = info.get("authors")
-                if isinstance(authors_field, list):
-                    for author_obj in authors_field:
-                        if not isinstance(author_obj, dict):
-                            continue
-                        name = author_obj.get("name")
-                        if not name:
-                            continue
-                        new_name = author_rename.get(name, name)
-                        if name != new_name:
-                            author_obj["name"] = new_name
-                            modified = True
+        else:
+            authors_field = info.get("authors")
+            if isinstance(authors_field, list):
+                for author_obj in authors_field:
+                    if not isinstance(author_obj, dict):
+                        continue
+                    name = author_obj.get("name")
+                    if not name:
+                        continue
+                    new_name = author_rename.get(name, name)
+                    if name != new_name:
+                        author_obj["name"] = new_name
+                        modified = True
+                        if verbose:
                            print(f"📝 重命名（authors）：{name} → {new_name}")

-                        existing_link = author_obj.pop("link", None) or author_obj.pop("url", None) or author_obj.get("links")
-                        if new_name in author_links:
-                            if author_obj.get("links") != author_links[new_name]:
-                                author_obj["links"] = author_links[new_name]
-                                modified = True
-                                print(f"🔧 更新链接（authors）：{new_name} → {author_links[new_name]}")
-                        elif "links" not in author_obj and existing_link:
-                            author_obj["links"] = existing_link
+                    existing_link = author_obj.pop("link", None) or author_obj.pop("url", None) or author_obj.get("links")
+                    if new_name in author_links:
+                        if author_obj.get("links") != author_links[new_name]:
+                            author_obj["links"] = author_links[new_name]
                            modified = True
+                            if verbose:
+                                print(f"🔧 更新链接（authors）：{new_name} → {author_links[new_name]}")
+                    elif "links" not in author_obj and existing_link:
+                        author_obj["links"] = existing_link
+                        modified = True
+                        if verbose:
                            print(f"🔄 标准化已有链接字段为 links → {existing_link}")
-                else:
+            else:
+                if verbose:
                    print("⚠️ 缺少 author 字段，且 authors 非标准格式")

-            if modified:
-                with open(file_path, "w", encoding="utf-8") as f:
-                    json.dump(data, f, ensure_ascii=False, indent=2)
-                count_modified += 1
+        if modified:
+            with open(file_path, "w", encoding="utf-8") as f:
+                json.dump(data, f, ensure_ascii=False, indent=2)
+            result["modified_files"] += 1
+            if verbose:
                print("✅ 写入完成")
-            else:
+        else:
+            if verbose:
                print("⏭️ 无需修改")

-print(f"\n🎉 处理完成：共 {count_total} 个 JSON 文件，修改了 {count_modified} 个")
+    if verbose:
+        print(f"\n🎉 处理完成：共 {result['total_files']} 个 JSON 文件，修改了 {result['modified_files']} 个")
+
+    return result
+
+
+# 如果作为独立脚本运行
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) < 2:
+        print("❌ 用法：python pathing_authors.py <JSON文件或目录路径>")
+    else:
+        process_json_authors(sys.argv[1])
--- a/build/validate.py
+++ b/build/validate.py
@@ -415,7 +415,7 @@ def check_position_ids(positions):
    
    return validation_issues, corrections

-# ==================== 验证修复编码 ====================
+# ==================== 验证修复文件编码 ====================

 def detect_encoding(file_path, read_size=2048):
    try:
@@ -477,6 +477,164 @@ def scan_and_convert(path, target_extensions=None):
    else:
        print(f"❌ Path not found: {path}")

+# ==================== 验证修复作者信息 ====================
+
+def process_json_authors(input_path, config_path="author_config.json", verbose=False):
+    """
+    处理 JSON 文件中的作者信息（支持 author → authors 结构化迁移、作者名重命名和链接统一）
+    
+    参数：
+        input_path (str): 要处理的文件路径或目录路径
+        config_path (str): 配置文件路径（默认在脚本同级）
+        verbose (bool): 是否打印详细日志信息
+        
+    返回：
+        dict: 包含处理总数和修改数量的统计信息
+    """
+    result = {
+        "total_files": 0,
+        "modified_files": 0,
+        "errors": []
+    }
+
+    if not os.path.exists(input_path):
+        raise FileNotFoundError(f"路径不存在：{input_path}")
+    if not os.path.exists(config_path):
+        raise FileNotFoundError(f"配置文件不存在：{config_path}")
+
+    # 加载配置
+    try:
+        with open(config_path, "r", encoding="utf-8") as f:
+            config = json.load(f)
+    except Exception as e:
+        raise RuntimeError(f"配置文件加载失败：{e}")
+
+    author_rename = config.get("rename", {})
+    author_links = config.get("links", {})
+
+    # 构建待处理文件列表
+    file_list = []
+    if os.path.isfile(input_path) and input_path.endswith(".json"):
+        file_list.append(input_path)
+    elif os.path.isdir(input_path):
+        for root, dirs, files in os.walk(input_path):
+            for filename in files:
+                if filename.endswith(".json"):
+                    file_list.append(os.path.join(root, filename))
+    else:
+        raise ValueError("输入路径必须是 .json 文件或目录")
+
+    for file_path in file_list:
+        result["total_files"] += 1
+        if verbose:
+            print(f"\n🔍 处理文件：{file_path}")
+
+        try:
+            with open(file_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+        except Exception as e:
+            msg = f"❌ 解析失败：{e}"
+            if verbose:
+                print(msg)
+            result["errors"].append((file_path, str(e)))
+            continue
+
+        info = data.get("info")
+        if not isinstance(info, dict):
+            if verbose:
+                print("⚠️ 缺少 info 字段")
+            continue
+
+        modified = False
+        author_field = info.get("author")
+
+        if author_field is not None:
+            if isinstance(author_field, str):
+                names = [name.strip() for name in author_field.split("&")]
+                new_authors = []
+                for name in names:
+                    new_name = author_rename.get(name, name)
+                    author_obj = {"name": new_name}
+                    if new_name in author_links:
+                        author_obj["links"] = author_links[new_name]
+                    new_authors.append(author_obj)
+                data["info"]["authors"] = new_authors
+                modified = True
+                if verbose:
+                    print("✅ 替换为结构化 authors")
+
+            elif isinstance(author_field, list):
+                for author_obj in author_field:
+                    if not isinstance(author_obj, dict):
+                        continue
+                    name = author_obj.get("name")
+                    if not name:
+                        continue
+                    new_name = author_rename.get(name, name)
+                    if name != new_name:
+                        author_obj["name"] = new_name
+                        modified = True
+                        if verbose:
+                            print(f"📝 重命名：{name} → {new_name}")
+
+                    existing_link = author_obj.pop("link", None) or author_obj.pop("url", None) or author_obj.get("links")
+                    if new_name in author_links:
+                        if author_obj.get("links") != author_links[new_name]:
+                            author_obj["links"] = author_links[new_name]
+                            modified = True
+                            if verbose:
+                                print(f"🔧 更新链接：{new_name} → {author_links[new_name]}")
+                    elif "links" not in author_obj and existing_link:
+                        author_obj["links"] = existing_link
+                        modified = True
+                        if verbose:
+                            print(f"🔄 标准化已有链接字段为 links → {existing_link}")
+
+        else:
+            authors_field = info.get("authors")
+            if isinstance(authors_field, list):
+                for author_obj in authors_field:
+                    if not isinstance(author_obj, dict):
+                        continue
+                    name = author_obj.get("name")
+                    if not name:
+                        continue
+                    new_name = author_rename.get(name, name)
+                    if name != new_name:
+                        author_obj["name"] = new_name
+                        modified = True
+                        if verbose:
+                            print(f"📝 重命名（authors）：{name} → {new_name}")
+
+                    existing_link = author_obj.pop("link", None) or author_obj.pop("url", None) or author_obj.get("links")
+                    if new_name in author_links:
+                        if author_obj.get("links") != author_links[new_name]:
+                            author_obj["links"] = author_links[new_name]
+                            modified = True
+                            if verbose:
+                                print(f"🔧 更新链接（authors）：{new_name} → {author_links[new_name]}")
+                    elif "links" not in author_obj and existing_link:
+                        author_obj["links"] = existing_link
+                        modified = True
+                        if verbose:
+                            print(f"🔄 标准化已有链接字段为 links → {existing_link}")
+            else:
+                # if verbose:
+                    print("⚠️ 缺少 author 字段，且 authors 非标准格式")
+
+        if modified:
+            with open(file_path, "w", encoding="utf-8") as f:
+                json.dump(data, f, ensure_ascii=False, indent=2)
+            result["modified_files"] += 1
+            if verbose:
+                print("✅ 写入完成")
+        else:
+            if verbose:
+                print("⏭️ 无需修改")
+
+    if verbose:
+        print(f"\n🎉 处理完成：共 {result['total_files']} 个 JSON 文件，修改了 {result['modified_files']} 个")
+
 # ==================== 主验证逻辑 ====================

 def initialize_data(data, file_path):
@@ -678,6 +836,7 @@ def main():

    if os.path.isfile(path) and path.endswith('.json'):
        scan_and_convert(path)
+        process_json_authors(path)
        # print(f"\n🔍 校验文件: {path}")
        notices = validate_file(path, auto_fix)
        if notices:
@@ -694,6 +853,7 @@ def main():
                    file_path = os.path.join(root, file)
                    print(f"\n🔍 校验文件: {file_path}")
                    scan_and_convert(file_path)
+                    process_json_authors(file_path)
                    notices = validate_file(file_path, auto_fix)
                    if notices:
                        all_notices.extend([f"{file_path}: {n}" for n in notices])