OpenAI API JSON格式指南与json_repair错误修复

核心参数是response\_format={"type": "json\_object"},其他支持json调用的模型也可以这样使用的，下面我们以Openai模型为例

指定OpenAI API返回JSON格式

基本JSON格式响应示例

  
import openai  
  
client = openai.OpenAI(api\_key="your-api-key")  
  
  
response = client.chat.completions.create(  
    model="gpt-4-turbo",  
    response\_format={"type": "json\_object"},  
    messages=[  
        {"role": "system", "content": "你是一个返回JSON格式的助手。"},  
        {"role": "user", "content": "返回包含用户名、年龄和爱好的JSON"}  
    ]  
)  
  
print(response.choices[0].message.content)  
# 输出示例:  
# {  
#   "name": "John Doe",  
#   "age": 30,  
#   "hobbies": ["reading", "hiking", "photography"]  
# }

更复杂的结构化数据请求

  
response = client.chat.completions.create(  
    model="gpt-4-turbo",  
    response\_format={"type": "json\_object"},  
    messages=[  
        {"role": "system", "content": "你是一个返回JSON格式的助手。"},  
        {"role": "user", "content": "生成5个用户的数据，包括姓名、电子邮件和订阅状态"}  
    ]  
)  
  
print(response.choices[0].message.content)  
# 输出示例:  
# {  
#   "users": [  
#     {"id": 1, "name": "Alice Smith", "email": "alice@example.com", "subscribed": true},  
#     {"id": 2, "name": "Bob Johnson", "email": "bob@example.com", "subscribed": false},  
#     {"id": 3, "name": "Carol Williams", "email": "carol@example.com", "subscribed": true},  
#     {"id": 4, "name": "David Brown", "email": "david@example.com", "subscribed": true},  
#     {"id": 5, "name": "Eve Davis", "email": "eve@example.com", "subscribed": false}  
#   ]  
# }

使用函数调用确保JSON响应

  
response = client.chat.completions.create(  
    model="gpt-4-turbo",  
    messages=[  
        {"role": "system", "content": "你是一个帮助用户的助手。"},  
        {"role": "user", "content": "分析以下文本的情感：'我今天非常开心，但天气不太好'"}  
    ],  
    tools=[{  
        "type": "function",  
        "function": {  
            "name": "analyze\_sentiment",  
            "description": "分析文本的情感",  
            "parameters": {  
                "type": "object",  
                "properties": {  
                    "text": {"type": "string", "description": "要分析的文本"},  
                    "sentiment": {"type": "string", "enum": ["positive", "negative", "neutral", "mixed"]},  
                    "confidence": {"type": "number", "description": "情感分析的置信度"},  
                    "details": {  
                        "type": "object",  
                        "properties": {  
                            "positive\_aspects": {"type": "array", "items": {"type": "string"}},  
                            "negative\_aspects": {"type": "array", "items": {"type": "string"}}  
                        }  
                    }  
                },  
                "required": ["sentiment", "confidence"]  
            }  
        }  
    }],  
    tool\_choice={"type": "function", "function": {"name": "analyze\_sentiment"}}  
)  
  
print(response.choices[0].message.tool\_calls[0].function.arguments)  
# 输出示例:  
# {  
#   "text": "我今天非常开心，但天气不太好",  
#   "sentiment": "mixed",  
#   "confidence": 0.85,  
#   "details": {  
#     "positive\_aspects": ["今天非常开心"],  
#     "negative\_aspects": ["天气不太好"]  
#   }  
# }

处理特定场景的JSON返回格式

示例1: 中文内容的JSON格式

  
  
response = client.chat.completions.create(  
    model="gpt-4-turbo",  
    response\_format={"type": "json\_object"},  
    messages=[  
        {"role": "system", "content": "你是一个返回JSON格式的助手。"},  
        {"role": "user", "content": "返回一个包含中文句子及其英文翻译的JSON数组"}  
    ]  
)  
  
print(response.choices[0].message.content)  
# 输出示例:  
# {  
#   "translations": [  
#     {"chinese": "你好世界", "english": "Hello world"},  
#     {"chinese": "很高兴认识你", "english": "Nice to meet you"},  
#     {"chinese": "我爱学习编程", "english": "I love learning programming"}  
#   ]  
# }

示例2: 嵌套JSON结构

  
response = client.chat.completions.create(  
    model="gpt-4-turbo",  
    response\_format={"type": "json\_object"},  
    messages=[  
        {"role": "system", "content": "你是一个返回JSON格式的助手。"},  
        {"role": "user", "content": "返回一个公司结构的JSON，包含部门和员工"}  
    ]  
)  
  
print(response.choices[0].message.content)  
# 输出示例:  
# {  
#   "company": {  
#     "name": "Tech Solutions Inc.",  
#     "founded": 2010,  
#     "departments": [  
#       {  
#         "name": "Engineering",  
#         "head": "Zhang Wei",  
#         "employees": [  
#           {"id": 101, "name": "李明", "position": "Senior Developer"},  
#           {"id": 102, "name": "王芳", "position": "QA Engineer"}  
#         ]  
#       },  
#       {  
#         "name": "Marketing",  
#         "head": "Sarah Johnson",  
#         "employees": [  
#           {"id": 201, "name": "刘青", "position": "Marketing Specialist"},  
#           {"id": 202, "name": "陈晓", "position": "Content Writer"}  
#         ]  
#       }  
#     ]  
#   }  
# }

示例3: 强制模型遵循特定JSON模式

  
  
def get\_structured\_data(query, schema):  
    system\_prompt = f"""  
    你必须严格按照以下JSON模式返回数据:  
    ```  
    {json.dumps(schema, ensure\_ascii=False, indent=2)}  
    ```  
    不要添加任何额外的字段，也不要省略任何必需的字段。  
    不要在返回的JSON外包含任何其他文本、解释或注释。  
    """  
      
    response = client.chat.completions.create(  
        model="gpt-4-turbo",  
        response\_format={"type": "json\_object"},  
        messages=[  
            {"role": "system", "content": system\_prompt},  
            {"role": "user", "content": query}  
        ]  
    )  
      
    return response.choices[0].message.content  
  
# 定义一个特定的数据模式  
product\_schema = {  
    "type": "object",  
    "properties": {  
        "products": {  
            "type": "array",  
            "items": {  
                "type": "object",  
                "properties": {  
                    "id": {"type": "string"},  
                    "name": {"type": "string"},  
                    "price": {"type": "number"},  
                    "category": {"type": "string"},  
                    "inStock": {"type": "boolean"}  
                },  
                "required": ["id", "name", "price", "category", "inStock"]  
            }  
        }  
    },  
    "required": ["products"]  
}  
  
result = get\_structured\_data("生成3个电子产品的详细信息", product\_schema)  
print(result)  
# 输出示例:  
# {  
#   "products": [  
#     {  
#       "id": "EP001",  
#       "name": "超薄笔记本电脑",  
#       "price": 5999.99,  
#       "category": "电脑",  
#       "inStock": true  
#     },  
#     {  
#       "id": "EP002",  
#       "name": "智能手机",  
#       "price": 3999.99,  
#       "category": "手机",  
#       "inStock": true  
#     },  
#     {  
#       "id": "EP003",  
#       "name": "无线耳机",  
#       "price": 999.99,  
#       "category": "音频设备",  
#       "inStock": false  
#     }  
#   ]  
# }

使用 `json\_repair` 修复JSON错误示例

当OpenAI API返回的JSON格式有问题时，可以使用json_repair库修复这些错误。可以看到大部分简单的错误示例是可以直接修复的，有些语义难度大的确实比较难修复。以下是常见的JSON错误及其修复示例：

  
from json\_repair import repair\_json, loads  
import json

示例1: 修复单引号替代双引号的问题

  
bad\_json1 = "{'name': 'John', 'age': 30, 'city': 'New York'}"  
fixed\_json1 = repair\_json(bad\_json1)  
print("修复单引号:")  
print(f"修复前: {bad\_json1}")  
print(f"修复后: {fixed\_json1}")  
print()

示例2: 修复缺少引号的键

  
bad\_json2 = "{name: 'John', age: 30, city: 'New York'}"  
fixed\_json2 = repair\_json(bad\_json2)  
print("修复缺少引号的键:")  
print(f"修复前: {bad\_json2}")  
print(f"修复后: {fixed\_json2}")  
print()

示例3: 修复逗号问题

  
bad\_json3 = '{"name": "John", "age": 30, "city": "New York",}'  # 结尾多余的逗号  
fixed\_json3 = repair\_json(bad\_json3)  
print("修复多余的逗号:")  
print(f"修复前: {bad\_json3}")  
print(f"修复后: {fixed\_json3}")  
print()

示例4: 修复缺少大括号的问题

  
bad\_json4 = '"name": "John", "age": 30, "city": "New York"' fixed\_json4 = repair\_json(bad\_json4) print("修复缺少括号:") print(f"修复前: {bad\_json4}") print(f"修复后: {fixed\_json4}") print()

picture.image 这个直接失败了，没有还原大括号

示例5: 修复非标准的布尔值或空值

  
bad\_json5 = '{"name": "John", "active": True, "data": None}'  
fixed\_json5 = repair\_json(bad\_json5)  
print("修复非标准的布尔值或空值:")  
print(f"修复前: {bad\_json5}")  
print(f"修复后: {fixed\_json5}")  
print()

示例6: 修复嵌套结构中的错误

  
bad\_json6 = '{"user": {"name": "John", "contacts": {"email": "john@example.com", phone: "123-456-7890"}}}'  
fixed\_json6 = repair\_json(bad\_json6)  
print("修复嵌套结构中的错误:")  
print(f"修复前: {bad\_json6}")  
print(f"修复后: {fixed\_json6}")  
print()

示例7: 修复数组中的错误

  
bad\_json7 = '{"items": [1, 2, 3,, 4, 5]}'  # 数组中有多余的逗号 fixed\_json7 = repair\_json(bad\_json7) print("修复数组中的错误:") print(f"修复前: {bad\_json7}") print(f"修复后: {fixed\_json7}") print()

示例8: 修复不匹配的括号

  
bad\_json8 = '{"name": "John", "items": [1, 2, 3}'  # 方括号没有闭合  
fixed\_json8 = repair\_json(bad\_json8)  
print("修复不匹配的括号:")  
print(f"修复前: {bad\_json8}")  
print(f"修复后: {fixed\_json8}")  
print()  
  
- 示例9: 修复中文等非ASCII字符的问题  
```python  
bad\_json9 = "{'name': '张三', 'city': '北京'}"  
fixed\_json9 = repair\_json(bad\_json9, ensure\_ascii=False)  
print("修复包含中文的JSON并保留中文字符:")  
print(f"修复前: {bad\_json9}")  
print(f"修复后: {fixed\_json9}")  
print()

示例10: 直接获取Python对象而不是JSON字符串

  
bad\_json10 = "{'name': 'John', 'age': 30, 'skills': ['Python', 'JavaScript']}"  
fixed\_obj10 = loads(bad\_json10)  # 等同于 repair\_json(bad\_json10, return\_objects=True)  
print("直接获取Python对象:")  
print(f"修复前: {bad\_json10}")  
print(f"修复后(Python对象): {fixed\_obj10}")  
print(f"对象类型: {type(fixed\_obj10)}")  
print()

示例11: 处理严重破损的JSON

  
severely\_broken\_json = "{这不是有效的JSON，name: 'John', age: missing\_value}"  
try:  
    fixed\_severely\_broken = repair\_json(severely\_broken\_json)  
    print("修复严重破损的JSON:")  
    print(f"修复前: {severely\_broken\_json}")  
    print(f"修复后: {fixed\_severely\_broken}")  
except Exception as e:  
    print(f"修复失败: {e}")  
print()

picture.image 这个其实修复失败了，主要是因为前一个字段确实有句话影响比较大，修复难度比较大。

示例12: 处理包含注释的JSON (JSON标准不支持注释)

  
json\_with\_comments = """  
{  
  "name": "John", // 这是用户名  
  "age": 30, /* 这是年龄 */  
  "city": "New York"  
}  
"""  
fixed\_json\_comments = repair\_json(json\_with\_comments)  
print("修复包含注释的JSON:")  
print(f"修复前: {json\_with\_comments}")  
print(f"修复后: {fixed\_json\_comments}")

还有一个场景，就是我们会经常遇到开头为```json

比如下面：

  
  
markdown\_json = """```json  
{  
  "name": "张三",  
  "age": 30,  
  "skills": ['Python', 'JavaScript', 'React'],  
  "contact": {  
    email: "zhangsan@example.com",  
    phone: "123-456-7890"  
  }  
}  
```"""

或者

  
broken\_json = """{  
  "products": [  
    {"id": 1, "name": "笔记本电脑", "price": 5999.99},  
    {"id": 2, "name": "智能手机", "price": 3999.99,},  
    {"id": 3, name: "无线耳机", "price": 999.99}  
  ],  
  "total\_items": 3,  
  "in\_stock": True  
}"""

我们可以用下面一个函数来去除前缀和后缀，然后再去修复

  
def repair\_json\_output(content: str) -> str:  
    """  
    Repair and normalize JSON output.  
  
    Args:  
        content (str): String content that may contain JSON  
  
    Returns:  
        str: Repaired JSON string, or original content if not JSON  
    """  
    content = content.strip()  
    if content.startswith(("{", "[")) or "```json"in content or "```ts"in content:  
        try:  
            # If content is wrapped in ```json code block, extract the JSON part  
            if content.startswith("```json"):  
                content = content.removeprefix("```json")  
  
            if content.startswith("```ts"):  
                content = content.removeprefix("```ts")  
  
            if content.endswith("```"):  
                content = content.removesuffix("```")  
  
            # Try to repair and parse JSON  
            repaired\_content = json\_repair.loads(content)  
            return json.dumps(repaired\_content, ensure\_ascii=False)  
        except Exception as e:  
            logger.warning(f"JSON repair failed: {e}")  
    return content

picture.image

添加微信，备注” LLM “进入大模型技术交流群