大模型能力排名

claude-opus-4-6-thinking
1
2
1
1
1
1
1
1
claude-opus-4-6
2
1
2
2
3
5
2
2
gemini-3.1-pro-preview
3
3
3
3
4
2
3
3
grok-4.20-beta1
4
20
5
7
17
4
9
12
gemini-3-pro
5
8
6
10
6
3
8
7
gpt-5.4-high
6
4
4
4
2
7
5
8
gpt-5.2-chat-latest-20260210
7
11
7
6
11
14
10
13
grok-4.20-beta-0309-reasoning
8
15
10
11
18
8
16
18
gemini-3-flash
9
14
12
18
9
9
13
14
claude-opus-4-5-20251101-thinking-32k
10
7
9
5
10
6
4
5
grok-4.1-thinking
11
24
15
23
30
22
31
29
claude-opus-4-5-20251101
12
6
11
12
12
10
7
6
claude-sonnet-4-6
13
9
8
8
8
16
6
4
qwen3.5-max-preview
14
12
18
20
5
11
18
20
gpt-5.3-chat-latest
15
27
13
13
26
35
22
16
gemini-3-flash (thinking-minimal)
16
26
21
25
15
13
20
19
gpt-5.4
17
5
17
24
14
33
11
15
dola-seed-2.0-preview
18
19
16
19
19
43
29
30
grok-4.1
19
38
23
30
44
23
28
28
gpt-5.1-high
20
23
25
33
21
24
19
23
glm-5
21
17
24
32
20
18
21
22
kimi-k2.5-thinking
22
18
26
17
7
29
24
25
claude-sonnet-4-5-20250929
23
21
19
16
49
12
15
11
claude-sonnet-4-5-20250929-thinking-32k
24
13
14
9
16
20
12
10
ernie-5.0-0110
25
45
27
29
22
27
30
37
qwen3.5-397b-a17b
26
34
29
31
24
26
32
32
ernie-5.0-preview-1203
27
47
32
45
75
30
41
48
claude-opus-4-1-20250805-thinking-16k
28
22
20
15
29
17
14
9
gemini-2.5-pro
29
30
35
52
27
15
23
24
claude-opus-4-1-20250805
30
32
22
21
36
19
17
17
mimo-v2-pro
31
10
28
14
23
31
36
26
gpt-4.5-preview-2025-02-27
32
69
55
64
70
21
26
40
chatgpt-4o-latest-20250326
33
68
37
50
79
28
37
44
glm-4.7
34
50
33
35
38
41
35
27
gpt-5.2-high
35
25
31
27
13
55
34
42
gpt-5.2
36
33
30
26
28
52
33
31
gpt-5.1
37
37
38
43
48
39
39
39
gemini-3.1-flash-lite-preview
38
72
52
68
53
34
58
50
qwen3-max-preview
39
28
36
37
32
54
38
36
gpt-5-high
40
35
46
53
34
80
57
79
kimi-k2.5-instant
41
36
34
22
31
60
27
33
o3-2025-04-16
42
55
57
66
25
66
69
85
grok-4-1-fast-reasoning
43
59
54
60
54
38
76
65
kimi-k2-thinking-turbo
44
39
40
36
33
56
44
51
amazon-nova-experimental-chat-26-02-10
45
16
41
34
35
114
40
56
gpt-5-chat
46
51
44
59
63
62
50
47
glm-4.6
47
56
53
63
46
44
49
52
deepseek-v3.2-exp-thinking
48
46
48
42
39
51
48
55
deepseek-v3.2
49
52
47
51
42
47
42
43
qwen3-max-2025-09-23
50
79
42
41
37
50
51
53
claude-opus-4-20250514-thinking-16k
51
48
39
28
56
25
25
21
deepseek-v3.2-exp
52
77
43
56
58
37
47
41
qwen3-235b-a22b-instruct-2507
53
43
45
47
47
70
52
49
deepseek-v3.2-thinking
54
49
51
46
50
58
45
46
deepseek-r1-0528
55
80
63
57
90
59
83
84
grok-4-fast-chat
56
76
65
67
45
64
72
66
ernie-5.0-preview-1022
57
60
74
99
86
40
75
70
deepseek-v3.1
58
70
64
79
57
61
66
67
kimi-k2-0905-preview
59
82
60
54
60
71
87
92
qwen3.5-122b-a10b
60
41
72
75
43
86
73
69
kimi-k2-0711-preview
61
86
69
65
104
85
101
100
deepseek-v3.1-thinking
62
67
58
69
61
42
43
35
deepseek-v3.1-terminus-thinking
63
50
61
67
69
46
38
mistral-large-3
64
78
67
48
83
76
64
76
deepseek-v3.1-terminus
65
77
93
97
36
82
73
qwen3-vl-235b-a22b-instruct
66
44
56
55
66
94
53
63
amazon-nova-experimental-chat-26-01-10
67
29
49
38
71
103
71
71
gpt-4.1-2025-04-14
68
94
70
72
117
45
70
64
claude-opus-4-20250514
69
62
61
58
85
32
55
34
grok-3-preview-02-24
70
91
75
83
116
46
61
57
gemini-2.5-flash
71
71
84
112
72
49
65
68
glm-4.5
72
58
66
73
64
79
62
74
grok-4-0709
73
66
83
95
40
48
77
75
mistral-medium-2508
74
87
73
74
87
74
78
81
minimax-m2.7
75
40
62
62
80
101
67
58
claude-haiku-4-5-20251001
76
53
59
39
99
72
56
54
qwen3.5-27b
77
42
78
96
41
98
74
62
minimax-m2.5
78
54
68
49
52
84
59
61
gemini-2.5-flash-preview-09-2025
79
57
82
108
65
65
68
72
grok-4-fast-reasoning
80
83
94
94
68
68
86
77
qwen3-235b-a22b-no-thinking
81
107
80
81
94
87
95
78
o1-2024-12-17
82
100
92
104
73
73
63
80
qwen3-next-80b-a3b-instruct
83
104
81
80
51
136
97
102
qwen3.5-flash
84
74
86
88
59
108
102
95
qwen3.5-35b-a3b
85
64
89
77
95
102
81
88
longcat-flash-chat
86
75
76
40
55
126
85
108
qwen3-235b-a22b-thinking-2507
87
31
88
86
82
78
88
93
claude-sonnet-4-20250514-thinking-32k
88
63
71
44
84
53
54
45
deepseek-r1
89
101
90
85
69
82
80
96
hunyuan-vision-1.5-thinking
90
79
90
77
79
94
qwen3-vl-235b-a22b-thinking
91
61
85
70
74
116
91
87
amazon-nova-experimental-chat-12-10
92
95
87
91
81
112
90
106
deepseek-v3-0324
93
106
99
110
118
63
100
98
mai-1-preview
94
90
103
105
98
96
105
101
mimo-v2-flash (non-thinking)
95
88
97
84
120
93
93
89
o4-mini-2025-04-16
96
96
102
106
62
117
113
127
gpt-5-mini-high
97
92
105
107
76
129
104
125
claude-sonnet-4-20250514
98
102
91
78
105
67
84
60
step-3.5-flash
99
84
101
82
91
115
94
90
o1-preview
100
118
112
118
106
91
99
111
mimo-v2-flash (thinking)
101
81
96
102
115
120
98
91
qwen3-coder-480b-a35b-instruct
102
117
95
71
114
90
92
86
hunyuan-t1-20250711
103
93
108
130
78
75
103
103
claude-3-7-sonnet-20250219-thinking-32k
104
89
93
76
109
57
60
59
mistral-medium-2505
105
116
107
103
138
92
116
104
minimax-m2.1-preview
106
65
98
89
96
107
89
82
hunyuan-turbos-20250416
107
137
114
135
139
89
123
117
qwen3-30b-a3b-instruct-2507
108
108
100
87
110
134
111
110
gpt-4.1-mini-2025-04-14
109
115
104
100
136
106
106
105
gemini-2.5-flash-lite-preview-09-2025-no-thinking
110
113
117
132
125
95
115
107
glm-4.6v
111
122
116
110
108
112
trinity-large
112
73
109
92
121
99
107
97
qwen3-235b-a22b
113
114
115
98
93
131
120
116
qwen2.5-max
114
123
119
129
124
100
121
109
gemini-2.5-flash-lite-preview-06-17-thinking
115
122
124
150
127
83
109
115
glm-4.5-air
116
109
116
111
101
127
118
113
claude-3-5-sonnet-20241022
117
126
110
101
140
88
110
99
claude-3-7-sonnet-20250219
118
111
111
109
131
81
96
83
qwen3-next-80b-a3b-thinking
119
112
120
115
100
132
119
126
glm-4.7-flash
120
110
118
113
122
145
125
121
amazon-nova-experimental-chat-11-10
121
85
113
114
89
152
112
119
gemma-3-27b-it
122
145
139
176
150
105
132
130