OpenAI API JSON格式指南与json_repair错误修复

技术

核心参数是response\_format={"type": "json\_object"},其他支持json调用的模型也可以这样使用的,下面我们以Openai模型为例

指定OpenAI API返回JSON格式

基本JSON格式响应示例

  
import openai  
  
client = openai.OpenAI(api\_key="your-api-key")  
  
  
response = client.chat.completions.create(  
    model="gpt-4-turbo",  
    response\_format={"type": "json\_object"},  
    messages=[  
        {"role": "system", "content": "你是一个返回JSON格式的助手。"},  
        {"role": "user", "content": "返回包含用户名、年龄和爱好的JSON"}  
    ]  
)  
  
print(response.choices[0].message.content)  
# 输出示例:  
# {  
#   "name": "John Doe",  
#   "age": 30,  
#   "hobbies": ["reading", "hiking", "photography"]  
# }  

更复杂的结构化数据请求

  
response = client.chat.completions.create(  
    model="gpt-4-turbo",  
    response\_format={"type": "json\_object"},  
    messages=[  
        {"role": "system", "content": "你是一个返回JSON格式的助手。"},  
        {"role": "user", "content": "生成5个用户的数据,包括姓名、电子邮件和订阅状态"}  
    ]  
)  
  
print(response.choices[0].message.content)  
# 输出示例:  
# {  
#   "users": [  
#     {"id": 1, "name": "Alice Smith", "email": "alice@example.com", "subscribed": true},  
#     {"id": 2, "name": "Bob Johnson", "email": "bob@example.com", "subscribed": false},  
#     {"id": 3, "name": "Carol Williams", "email": "carol@example.com", "subscribed": true},  
#     {"id": 4, "name": "David Brown", "email": "david@example.com", "subscribed": true},  
#     {"id": 5, "name": "Eve Davis", "email": "eve@example.com", "subscribed": false}  
#   ]  
# }  

使用函数调用确保JSON响应

  
response = client.chat.completions.create(  
    model="gpt-4-turbo",  
    messages=[  
        {"role": "system", "content": "你是一个帮助用户的助手。"},  
        {"role": "user", "content": "分析以下文本的情感:'我今天非常开心,但天气不太好'"}  
    ],  
    tools=[{  
        "type": "function",  
        "function": {  
            "name": "analyze\_sentiment",  
            "description": "分析文本的情感",  
            "parameters": {  
                "type": "object",  
                "properties": {  
                    "text": {"type": "string", "description": "要分析的文本"},  
                    "sentiment": {"type": "string", "enum": ["positive", "negative", "neutral", "mixed"]},  
                    "confidence": {"type": "number", "description": "情感分析的置信度"},  
                    "details": {  
                        "type": "object",  
                        "properties": {  
                            "positive\_aspects": {"type": "array", "items": {"type": "string"}},  
                            "negative\_aspects": {"type": "array", "items": {"type": "string"}}  
                        }  
                    }  
                },  
                "required": ["sentiment", "confidence"]  
            }  
        }  
    }],  
    tool\_choice={"type": "function", "function": {"name": "analyze\_sentiment"}}  
)  
  
print(response.choices[0].message.tool\_calls[0].function.arguments)  
# 输出示例:  
# {  
#   "text": "我今天非常开心,但天气不太好",  
#   "sentiment": "mixed",  
#   "confidence": 0.85,  
#   "details": {  
#     "positive\_aspects": ["今天非常开心"],  
#     "negative\_aspects": ["天气不太好"]  
#   }  
# }  

处理特定场景的JSON返回格式

示例1: 中文内容的JSON格式

  
  
response = client.chat.completions.create(  
    model="gpt-4-turbo",  
    response\_format={"type": "json\_object"},  
    messages=[  
        {"role": "system", "content": "你是一个返回JSON格式的助手。"},  
        {"role": "user", "content": "返回一个包含中文句子及其英文翻译的JSON数组"}  
    ]  
)  
  
print(response.choices[0].message.content)  
# 输出示例:  
# {  
#   "translations": [  
#     {"chinese": "你好世界", "english": "Hello world"},  
#     {"chinese": "很高兴认识你", "english": "Nice to meet you"},  
#     {"chinese": "我爱学习编程", "english": "I love learning programming"}  
#   ]  
# }  

示例2: 嵌套JSON结构

  
response = client.chat.completions.create(  
    model="gpt-4-turbo",  
    response\_format={"type": "json\_object"},  
    messages=[  
        {"role": "system", "content": "你是一个返回JSON格式的助手。"},  
        {"role": "user", "content": "返回一个公司结构的JSON,包含部门和员工"}  
    ]  
)  
  
print(response.choices[0].message.content)  
# 输出示例:  
# {  
#   "company": {  
#     "name": "Tech Solutions Inc.",  
#     "founded": 2010,  
#     "departments": [  
#       {  
#         "name": "Engineering",  
#         "head": "Zhang Wei",  
#         "employees": [  
#           {"id": 101, "name": "李明", "position": "Senior Developer"},  
#           {"id": 102, "name": "王芳", "position": "QA Engineer"}  
#         ]  
#       },  
#       {  
#         "name": "Marketing",  
#         "head": "Sarah Johnson",  
#         "employees": [  
#           {"id": 201, "name": "刘青", "position": "Marketing Specialist"},  
#           {"id": 202, "name": "陈晓", "position": "Content Writer"}  
#         ]  
#       }  
#     ]  
#   }  
# }  

示例3: 强制模型遵循特定JSON模式

  
  
def get\_structured\_data(query, schema):  
    system\_prompt = f"""  
    你必须严格按照以下JSON模式返回数据:  
    ```  
    {json.dumps(schema, ensure\_ascii=False, indent=2)}  
    ```  
    不要添加任何额外的字段,也不要省略任何必需的字段。  
    不要在返回的JSON外包含任何其他文本、解释或注释。  
    """  
      
    response = client.chat.completions.create(  
        model="gpt-4-turbo",  
        response\_format={"type": "json\_object"},  
        messages=[  
            {"role": "system", "content": system\_prompt},  
            {"role": "user", "content": query}  
        ]  
    )  
      
    return response.choices[0].message.content  
  
# 定义一个特定的数据模式  
product\_schema = {  
    "type": "object",  
    "properties": {  
        "products": {  
            "type": "array",  
            "items": {  
                "type": "object",  
                "properties": {  
                    "id": {"type": "string"},  
                    "name": {"type": "string"},  
                    "price": {"type": "number"},  
                    "category": {"type": "string"},  
                    "inStock": {"type": "boolean"}  
                },  
                "required": ["id", "name", "price", "category", "inStock"]  
            }  
        }  
    },  
    "required": ["products"]  
}  
  
result = get\_structured\_data("生成3个电子产品的详细信息", product\_schema)  
print(result)  
# 输出示例:  
# {  
#   "products": [  
#     {  
#       "id": "EP001",  
#       "name": "超薄笔记本电脑",  
#       "price": 5999.99,  
#       "category": "电脑",  
#       "inStock": true  
#     },  
#     {  
#       "id": "EP002",  
#       "name": "智能手机",  
#       "price": 3999.99,  
#       "category": "手机",  
#       "inStock": true  
#     },  
#     {  
#       "id": "EP003",  
#       "name": "无线耳机",  
#       "price": 999.99,  
#       "category": "音频设备",  
#       "inStock": false  
#     }  
#   ]  
# }  

使用 **json\_repair** 修复JSON错误示例

当OpenAI API返回的JSON格式有问题时,可以使用json_repair库修复这些错误。可以看到大部分简单的错误示例是可以直接修复的,有些语义难度大的确实比较难修复。以下是常见的JSON错误及其修复示例:

  
from json\_repair import repair\_json, loads  
import json
  • 示例1: 修复单引号替代双引号的问题
  
bad\_json1 = "{'name': 'John', 'age': 30, 'city': 'New York'}"  
fixed\_json1 = repair\_json(bad\_json1)  
print("修复单引号:")  
print(f"修复前: {bad\_json1}")  
print(f"修复后: {fixed\_json1}")  
print()
  • 示例2: 修复缺少引号的键
  
bad\_json2 = "{name: 'John', age: 30, city: 'New York'}"  
fixed\_json2 = repair\_json(bad\_json2)  
print("修复缺少引号的键:")  
print(f"修复前: {bad\_json2}")  
print(f"修复后: {fixed\_json2}")  
print()
  • 示例3: 修复逗号问题
  
bad\_json3 = '{"name": "John", "age": 30, "city": "New York",}'  # 结尾多余的逗号  
fixed\_json3 = repair\_json(bad\_json3)  
print("修复多余的逗号:")  
print(f"修复前: {bad\_json3}")  
print(f"修复后: {fixed\_json3}")  
print()
  • 示例4: 修复缺少大括号的问题
  
bad\_json4 = '"name": "John", "age": 30, "city": "New York"' fixed\_json4 = repair\_json(bad\_json4) print("修复缺少括号:") print(f"修复前: {bad\_json4}") print(f"修复后: {fixed\_json4}") print()

picture.image这个直接失败了,没有还原大括号

  • 示例5: 修复非标准的布尔值或空值
  
bad\_json5 = '{"name": "John", "active": True, "data": None}'  
fixed\_json5 = repair\_json(bad\_json5)  
print("修复非标准的布尔值或空值:")  
print(f"修复前: {bad\_json5}")  
print(f"修复后: {fixed\_json5}")  
print()
  • 示例6: 修复嵌套结构中的错误
  
bad\_json6 = '{"user": {"name": "John", "contacts": {"email": "john@example.com", phone: "123-456-7890"}}}'  
fixed\_json6 = repair\_json(bad\_json6)  
print("修复嵌套结构中的错误:")  
print(f"修复前: {bad\_json6}")  
print(f"修复后: {fixed\_json6}")  
print()
  • 示例7: 修复数组中的错误
  
bad\_json7 = '{"items": [1, 2, 3,, 4, 5]}'  # 数组中有多余的逗号 fixed\_json7 = repair\_json(bad\_json7) print("修复数组中的错误:") print(f"修复前: {bad\_json7}") print(f"修复后: {fixed\_json7}") print()
  • 示例8: 修复不匹配的括号
  
bad\_json8 = '{"name": "John", "items": [1, 2, 3}'  # 方括号没有闭合  
fixed\_json8 = repair\_json(bad\_json8)  
print("修复不匹配的括号:")  
print(f"修复前: {bad\_json8}")  
print(f"修复后: {fixed\_json8}")  
print()  
  
- 示例9: 修复中文等非ASCII字符的问题  
```python  
bad\_json9 = "{'name': '张三', 'city': '北京'}"  
fixed\_json9 = repair\_json(bad\_json9, ensure\_ascii=False)  
print("修复包含中文的JSON并保留中文字符:")  
print(f"修复前: {bad\_json9}")  
print(f"修复后: {fixed\_json9}")  
print()
  • 示例10: 直接获取Python对象而不是JSON字符串
  
bad\_json10 = "{'name': 'John', 'age': 30, 'skills': ['Python', 'JavaScript']}"  
fixed\_obj10 = loads(bad\_json10)  # 等同于 repair\_json(bad\_json10, return\_objects=True)  
print("直接获取Python对象:")  
print(f"修复前: {bad\_json10}")  
print(f"修复后(Python对象): {fixed\_obj10}")  
print(f"对象类型: {type(fixed\_obj10)}")  
print()
  • 示例11: 处理严重破损的JSON
  
severely\_broken\_json = "{这不是有效的JSON,name: 'John', age: missing\_value}"  
try:  
    fixed\_severely\_broken = repair\_json(severely\_broken\_json)  
    print("修复严重破损的JSON:")  
    print(f"修复前: {severely\_broken\_json}")  
    print(f"修复后: {fixed\_severely\_broken}")  
except Exception as e:  
    print(f"修复失败: {e}")  
print()

picture.image这个其实修复失败了,主要是因为前一个字段确实有句话影响比较大,修复难度比较大。

  • 示例12: 处理包含注释的JSON (JSON标准不支持注释)
  
json\_with\_comments = """  
{  
  "name": "John", // 这是用户名  
  "age": 30, /* 这是年龄 */  
  "city": "New York"  
}  
"""  
fixed\_json\_comments = repair\_json(json\_with\_comments)  
print("修复包含注释的JSON:")  
print(f"修复前: {json\_with\_comments}")  
print(f"修复后: {fixed\_json\_comments}")

还有一个场景,就是我们会经常遇到开头为```json

比如下面:

  
  
markdown\_json = """```json  
{  
  "name": "张三",  
  "age": 30,  
  "skills": ['Python', 'JavaScript', 'React'],  
  "contact": {  
    email: "zhangsan@example.com",  
    phone: "123-456-7890"  
  }  
}  
```"""  
  

或者

  
broken\_json = """{  
  "products": [  
    {"id": 1, "name": "笔记本电脑", "price": 5999.99},  
    {"id": 2, "name": "智能手机", "price": 3999.99,},  
    {"id": 3, name: "无线耳机", "price": 999.99}  
  ],  
  "total\_items": 3,  
  "in\_stock": True  
}"""  

我们可以用下面一个函数来去除前缀和后缀,然后再去修复

  
def repair\_json\_output(content: str) -> str:  
    """  
    Repair and normalize JSON output.  
  
    Args:  
        content (str): String content that may contain JSON  
  
    Returns:  
        str: Repaired JSON string, or original content if not JSON  
    """  
    content = content.strip()  
    if content.startswith(("{", "[")) or "```json"in content or "```ts"in content:  
        try:  
            # If content is wrapped in ```json code block, extract the JSON part  
            if content.startswith("```json"):  
                content = content.removeprefix("```json")  
  
            if content.startswith("```ts"):  
                content = content.removeprefix("```ts")  
  
            if content.endswith("```"):  
                content = content.removesuffix("```")  
  
            # Try to repair and parse JSON  
            repaired\_content = json\_repair.loads(content)  
            return json.dumps(repaired\_content, ensure\_ascii=False)  
        except Exception as e:  
            logger.warning(f"JSON repair failed: {e}")  
    return content  
  

picture.image

添加微信,备注” LLM “进入大模型技术交流群

picture.image

picture.image

如果你觉得这篇文章对你有帮助,别忘了点个赞、送个喜欢

/ 作者:致Great

/ 作者:欢迎转载,标注来源即可

0
0
0
0
关于作者
关于作者

文章

0

获赞

0

收藏

0

相关资源
大规模高性能计算集群优化实践
随着机器学习的发展,数据量和训练模型都有越来越大的趋势,这对基础设施有了更高的要求,包括硬件、网络架构等。本次分享主要介绍火山引擎支撑大规模高性能计算集群的架构和优化实践。
相关产品
评论
未登录
看完啦,登录分享一下感受吧~
暂无评论