Skip to content

Commit 7669be9

Browse files
authored
[Example] Update qwen2 7b/14b config from qwen1.5 to 2.5. (#125)
1 parent bcef97b commit 7669be9

File tree

4 files changed

+26
-24
lines changed

4 files changed

+26
-24
lines changed
Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
[qwen2]
2-
model_name = /data/models/Qwen1.5-14B-Chat
2+
model_name = /data/Qwen2.5-14B-Instruct
33
head_num = 40
4-
kv_head_num = 40
4+
kv_head_num = 8
55
size_per_head = 128
6-
inter_size = 13696
6+
inter_size = 13824
77
max_pos_seq_len = 32768
8-
num_layer = 40
8+
num_layer = 48
99
rms_norm_eps = 1e-06
1010
layernorm_type = pre_layernorm
1111
activation_type = silu
@@ -16,4 +16,6 @@ start_id = 151643
1616
end_id = 151645
1717
pad_id = 151643
1818
weight_data_type = fp16
19+
attn_params_type = GQAttnParams
20+
ffn_params_type = LlamaFFNParams
1921

examples/model_config/qwen2-14b/config.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,19 @@
88
"hidden_act": "silu",
99
"hidden_size": 5120,
1010
"initializer_range": 0.02,
11-
"intermediate_size": 13696,
11+
"intermediate_size": 13824,
1212
"max_position_embeddings": 32768,
13-
"max_window_layers": 35,
13+
"max_window_layers": 70,
1414
"model_type": "qwen2",
1515
"num_attention_heads": 40,
16-
"num_hidden_layers": 40,
17-
"num_key_value_heads": 40,
16+
"num_hidden_layers": 48,
17+
"num_key_value_heads": 8,
1818
"rms_norm_eps": 1e-06,
1919
"rope_theta": 1000000.0,
20-
"sliding_window": 32768,
20+
"sliding_window": 131072,
2121
"tie_word_embeddings": false,
2222
"torch_dtype": "bfloat16",
23-
"transformers_version": "4.37.0",
23+
"transformers_version": "4.43.1",
2424
"use_cache": true,
2525
"use_sliding_window": false,
2626
"vocab_size": 152064

examples/model_config/qwen2-7b/config.ini

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
[qwen2]
2-
model_name = /data/models/Qwen1.5-7B-Chat
3-
head_num = 32
4-
kv_head_num = 32
2+
model_name = /data/Qwen2.5-7B-Instruct
3+
head_num = 28
4+
kv_head_num = 4
55
size_per_head = 128
6-
inter_size = 11008
6+
inter_size = 18944
77
max_pos_seq_len = 32768
8-
num_layer = 32
8+
num_layer = 28
99
rms_norm_eps = 1e-06
1010
layernorm_type = pre_layernorm
1111
activation_type = silu
1212
rope_theta = 1000000.0
1313
has_post_decoder_layernorm = 1
14-
vocab_size = 151936
14+
vocab_size = 152064
1515
start_id = 151643
1616
end_id = 151645
1717
pad_id = 151643

examples/model_config/qwen2-7b/config.json

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,22 @@
66
"bos_token_id": 151643,
77
"eos_token_id": 151645,
88
"hidden_act": "silu",
9-
"hidden_size": 4096,
9+
"hidden_size": 3584,
1010
"initializer_range": 0.02,
11-
"intermediate_size": 11008,
11+
"intermediate_size": 18944,
1212
"max_position_embeddings": 32768,
1313
"max_window_layers": 28,
1414
"model_type": "qwen2",
15-
"num_attention_heads": 32,
16-
"num_hidden_layers": 32,
17-
"num_key_value_heads": 32,
15+
"num_attention_heads": 28,
16+
"num_hidden_layers": 28,
17+
"num_key_value_heads": 4,
1818
"rms_norm_eps": 1e-06,
1919
"rope_theta": 1000000.0,
20-
"sliding_window": 32768,
20+
"sliding_window": 131072,
2121
"tie_word_embeddings": false,
2222
"torch_dtype": "bfloat16",
23-
"transformers_version": "4.37.0",
23+
"transformers_version": "4.43.1",
2424
"use_cache": true,
2525
"use_sliding_window": false,
26-
"vocab_size": 151936
26+
"vocab_size": 152064
2727
}

0 commit comments

Comments
 (0)