大模型能力排名

更新时间：2026-06-25

模型	总分	专业能力	高难度提示词	编程	数学	创意写作	指令遵循	长文本
claude-fable-5	1	3	2	1	1	1	1	2
claude-opus-4-6-thinking	2	1	1	3	3	2	2	1
claude-opus-4-7-thinking	3	6	4	2	5	3	3	4
claude-opus-4-6	4	2	3	5	4	7	4	3
claude-opus-4-7	5	4	5	4	7	5	6	5
muse-spark	6	30	11	13	30	11	24	29
gemini-3.1-pro-preview	7	10	8	14	10	6	9	8
gemini-3-pro	8	19	9	20	17	4	14	13
claude-opus-4-8-thinking	9	9	6	6	8	8	5	7
gpt-5.5-high	10	8	13	19	11	19	11	16
gpt-5.4-high	11	7	12	16	6	24	12	17
claude-opus-4-8	12	5	7	7	9	16	7	6
gemini-3.5-flash	13	12	23	35	2	10	25	24
gpt-5.2-chat-latest-20260210	14	26	18	21	38	41	28	31
glm-5.1	15	16	15	9	21	12	18	14
gpt-5.5	16	14	21	39	13	20	16	25
qwen3.7-max-preview	17	13	20	12	12	31	20	9
grok-4.20-beta-0309-reasoning	18	48	25	28	23	27	39	39
grok-4.20-beta1	19	51	30	34	42	14	37	41
gemini-3-flash	20	25	22	33	20	15	27	26
claude-opus-4-5-20251101-thinking-32k	21	17	14	8	24	9	8	10
gpt-5.5-instant	22	45	26	26	29	17	31	30
grok-4.20-multi-agent-beta-0309	23	34	32	27	41	22	45	44
claude-sonnet-4-6	24	15	10	10	33	28	10	11
glm-5.2 (max)	25	29	17	11	–	29	19	19
claude-opus-4-5-20251101	26	18	16	15	28	13	13	12
gpt-5.4	27	24	27	23	32	36	21	23
ernie-5.1	28	35	28	22	16	40	32	38
mimo-v2.5-pro	29	11	19	17	14	39	15	15
grok-4.1-thinking	30	55	40	45	50	47	59	58
qwen3.5-max-preview	31	22	24	29	25	21	17	22
qwen3.6-max-preview	32	23	34	32	19	37	36	27
gemini-3-flash (thinking-minimal)	33	61	41	53	36	26	43	42
kimi-k2.6	34	20	31	31	15	44	29	28
grok-4.1	35	71	44	54	70	42	58	54
deepseek-v4-pro-thinking	36	47	38	51	18	34	35	33
glm-5	37	33	37	46	55	25	40	34
deepseek-v4-pro	38	43	42	42	58	33	34	35
claude-sonnet-4-5-20250929-thinking-32k	39	21	29	18	35	23	22	18
claude-sonnet-4-5-20250929	40	32	33	24	73	18	23	21
dola-seed-2.0-pro	41	42	36	30	43	68	54	52
gpt-5.1-high	42	41	45	55	37	46	38	43
gemma-4-31b	43	38	46	48	26	52	33	36
kimi-k2.5-thinking	44	37	48	41	22	48	46	45
ernie-5.0-preview-1203	45	74	54	80	106	53	72	81
claude-opus-4-1-20250805-thinking-16k	46	36	35	25	48	30	26	20
gpt-5.3-chat-latest	47	53	47	47	79	65	53	47
mimo-v2-pro	48	28	43	40	40	49	42	37
minimax-m3	49	31	51	37	31	70	41	49
gpt-5.4-mini-high	50	39	49	49	54	75	55	61
claude-opus-4-1-20250805	51	59	39	36	64	35	30	32
ernie-5.0-0110	52	82	53	56	59	50	65	73
gemini-2.5-pro	53	64	64	89	52	32	47	46
gpt-4.5-preview-2025-02-27	54	104	88	98	103	38	49	68
qwen3.6-plus	55	49	50	52	39	64	50	50
grok-4.3	56	83	66	61	87	43	82	59
qwen3.5-397b-a17b	57	40	52	50	46	56	51	48
chatgpt-4o-latest-20250326	58	103	68	82	112	51	61	74
glm-4.7	59	79	55	63	72	66	60	51
gpt-5.1	60	68	70	77	81	59	63	66
gemma-4-26b-a4b	61	50	57	67	27	69	48	56
gpt-5.2-high	62	44	59	58	34	95	64	82
deepseek-v4-flash-thinking	63	57	67	72	60	61	56	57
longcat-flash-chat-2602-exp	64	58	60	43	61	90	85	77
qwen3-max-preview	65	54	61	65	56	83	66	67
gpt-5.2	66	60	62	64	63	91	67	75
deepseek-v4-flash	67	67	65	66	77	60	62	62
gpt-5-high	68	66	81	85	62	111	90	114
mimo-v2.5	69	46	56	57	44	80	57	53
gemini-3.1-flash-lite-preview	70	84	84	103	53	57	92	89
mimo-v2-omni	71	63	63	59	49	84	68	55
kimi-k2.5-instant	72	70	58	38	51	93	52	64
o3-2025-04-16	73	87	90	101	45	100	101	119
grok-4-1-fast-reasoning	74	90	87	94	89	55	102	97
kimi-k2-thinking-turbo	75	65	71	62	57	85	75	83
amazon-nova-experimental-chat-26-02-10	76	27	72	60	76	146	76	90
gpt-5-chat	77	78	73	93	97	89	77	78
mistral-medium-3.5	78	88	83	71	65	87	70	86
glm-4.6	79	89	86	99	83	71	83	84
deepseek-v3.2	80	75	77	81	68	73	69	71
deepseek-v3.2-exp-thinking	81	77	79	74	75	79	79	87
claude-opus-4-20250514-thinking-16k	82	80	69	44	84	45	44	40
qwen3-max-2025-09-23	83	115	75	75	67	81	81	85
qwen3-235b-a22b-instruct-2507	84	73	74	78	85	105	80	80
deepseek-v3.2-exp	85	107	76	87	88	58	78	69
deepseek-v3.2-thinking	86	72	80	73	78	88	73	70
deepseek-r1-0528	87	112	97	90	127	82	116	120
grok-4-fast-chat	88	109	98	102	82	98	107	100
ernie-5.0-preview-1022	89	96	107	136	120	63	105	102
kimi-k2-0905-preview	90	117	94	83	91	102	117	124
deepseek-v3.1-terminus-thinking	91	–	82	92	107	97	71	65
kimi-k2-0711-preview	92	121	103	95	136	116	136	134
deepseek-v3.1	93	105	101	114	93	94	98	101
qwen3.5-122b-a10b	94	85	100	100	80	119	94	106
deepseek-v3.1-thinking	95	102	93	105	95	67	74	63
minimax-m2.7	96	86	85	68	94	124	91	79
nvidia-nemotron-3-ultra-550b-a55b-nvfp4	97	56	96	88	47	120	108	95
deepseek-v3.1-terminus	98	–	111	125	128	62	114	104
amazon-nova-experimental-chat-26-01-10	99	52	78	69	102	133	100	99
mistral-large-3	100	110	102	84	116	109	97	107