双卡4090上Qwen3-32B-AWQ的全科高考成绩 - 文章 - 开发者社区

上篇文章《单卡4090上DeepSeek-R1-0528-Qwen3-8B全科高考成绩》测试了最新的 R1-0528 蒸馏 Qwen3-8B 的模型 DeepSeek-R1-0528-Qwen3-8B 的全科高考成绩，结果不太理想。于是抽空再测了下 Qwen3-32B-AWQ 的全科高考成绩。为了加快速度，选择了双卡4090部署。vLLM 启动命令如下：

  
vllm serve /models/qwen/Qwen3-32B-AWQ --port 7869 \  
  --served-model-name qwen3 \  
  --tensor-parallel-size 2 \  
  --gpu-memory-utilization 0.7 \  
  --max-model-len 16584 \  
  --max-num-batched-tokens 16584 \  
  --dtype auto \  
  --enable-chunked-prefill \  
  --trust-remote-code \  
  --enable-auto-tool-choice \  
  --tool-call-parser hermes \  
  --enable-reasoning \  
  --reasoning-parser deepseek_r1 \  
  --api-key sk-xxx

具体测试步骤参见《如何用高考数据集来评测大模型》

Qwen3-32B-AWQ

答卷客观题分数汇总 json：

  
{  
    "model_name": "qwen3",  
    "total_score": 8421.0,  
    "correct_score": 8137.5,  
    "question_num": 2614.0,  
    "scoring_rate": 0.966,  
    "subject": {  
        "English": {  
            "total_score": 2047.0,  
            "correct_score": 1972.5,  
            "scoring_rate": 0.964,  
            "question_num": 1213.0,  
            "type": {  
                "2010-2013_English_MCQs": {  
                    "total_score": 99.0,  
                    "correct_score": 99.0,  
                    "question_num": 99.0,  
                    "scoring_rate": 1.0  
                },  
                "2010-2022_English_Fill_in_Blanks": {  
                    "total_score": 840.0,  
                    "correct_score": 817.5,  
                    "question_num": 560.0,  
                    "scoring_rate": 0.973  
                },  
                "2012-2022_English_Cloze_Test": {  
                    "total_score": 220.0,  
                    "correct_score": 208.0,  
                    "question_num": 110.0,  
                    "scoring_rate": 0.945  
                },  
                "2010-2022_English_Reading_Comp": {  
                    "total_score": 888.0,  
                    "correct_score": 848.0,  
                    "question_num": 444.0,  
                    "scoring_rate": 0.955  
                }  
            }  
        },  
        "Math": {  
            "total_score": 1940.0,  
            "correct_score": 1940.0,  
            "scoring_rate": 1.0,  
            "question_num": 388.0,  
            "type": {  
                "2010-2022_Math_I_MCQs": {  
                    "total_score": 960.0,  
                    "correct_score": 960.0,  
                    "question_num": 192.0,  
                    "scoring_rate": 1.0  
                },  
                "2010-2022_Math_II_MCQs": {  
                    "total_score": 980.0,  
                    "correct_score": 980.0,  
                    "question_num": 196.0,  
                    "scoring_rate": 1.0  
                }  
            }  
        },  
        "Chinese": {  
            "total_score": 384.0,  
            "correct_score": 315.0,  
            "scoring_rate": 0.82,  
            "question_num": 128.0,  
            "type": {  
                "2010-2022_Chinese_Modern_Lit": {  
                    "total_score": 189.0,  
                    "correct_score": 150.0,  
                    "question_num": 63.0,  
                    "scoring_rate": 0.794  
                },  
                "2010-2022_Chinese_Lang_and_Usage_MCQs": {  
                    "total_score": 195.0,  
                    "correct_score": 165.0,  
                    "question_num": 65.0,  
                    "scoring_rate": 0.846  
                }  
            }  
        },  
        "Physics": {  
            "total_score": 312.0,  
            "correct_score": 312.0,  
            "scoring_rate": 1.0,  
            "question_num": 52.0,  
            "type": {  
                "2010-2022_Physics_MCQs": {  
                    "total_score": 312.0,  
                    "correct_score": 312.0,  
                    "question_num": 52.0,  
                    "scoring_rate": 1.0  
                }  
            }  
        },  
        "Chemistry": {  
            "total_score": 462.0,  
            "correct_score": 438.0,  
            "scoring_rate": 0.948,  
            "question_num": 77.0,  
            "type": {  
                "2010-2022_Chemistry_MCQs": {  
                    "total_score": 462.0,  
                    "correct_score": 438.0,  
                    "question_num": 77.0,  
                    "scoring_rate": 0.948  
                }  
            }  
        },  
        "Biology": {  
            "total_score": 756.0,  
            "correct_score": 756.0,  
            "scoring_rate": 1.0,  
            "question_num": 126.0,  
            "type": {  
                "2010-2022_Biology_MCQs": {  
                    "total_score": 756.0,  
                    "correct_score": 756.0,  
                    "question_num": 126.0,  
                    "scoring_rate": 1.0  
                }  
            }  
        },  
        "History": {  
            "total_score": 1088.0,  
            "correct_score": 1032.0,  
            "scoring_rate": 0.949,  
            "question_num": 272.0,  
            "type": {  
                "2010-2022_History_MCQs": {  
                    "total_score": 1088.0,  
                    "correct_score": 1032.0,  
                    "question_num": 272.0,  
                    "scoring_rate": 0.949  
                }  
            }  
        },  
        "Geography": {  
            "total_score": 356.0,  
            "correct_score": 328.0,  
            "scoring_rate": 0.921,  
            "question_num": 89.0,  
            "type": {  
                "2010-2022_Geography_MCQs": {  
                    "total_score": 356.0,  
                    "correct_score": 328.0,  
                    "question_num": 89.0,  
                    "scoring_rate": 0.921  
                }  
            }  
        },  
        "Politics": {  
            "total_score": 1076.0,  
            "correct_score": 1044.0,  
            "scoring_rate": 0.97,  
            "question_num": 269.0,  
            "type": {  
                "2010-2022_Political_Science_MCQs": {  
                    "total_score": 1076.0,  
                    "correct_score": 1044.0,  
                    "question_num": 269.0,  
                    "scoring_rate": 0.97  
                }  
            }  
        }  
    }  
}

整理成绩表格如下：(年份未标注的默认为 2010-2022年)

picture.image

可以看到，数学、物理、生物三个科目都满分了。其余的也都接近满分。最差的竟然是语文，得分率只有82%才。这是汉语真的太难了，还是训练语料里面中文占比太少啊。。。

模型: Qwen3/Qwen3-32B-AWQ 高考客观题总分：

总分满分: 8421.0
总得分: 8137.5
总题目数: 2614.0
总得分率: 96.6%

说明 32B 模型实用性还是远远高于 8B 模型的。单纯蒸馏优化，很难补上参数限制下的模型基础能力短板。

最后祝明天全国的高三孩子们高考顺利，成绩超过大模型！