模型结构
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,2 +1,3 @@
|
|||||||
data/*
|
data/*
|
||||||
Qwen3-VL-2B-Instruct/*
|
Qwen3-VL-2B-Instruct/*
|
||||||
|
__pycache__
|
||||||
66
README_CN.md
66
README_CN.md
@ -346,3 +346,69 @@ python benchmark.py \
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Qwen3VLForConditionalGeneration(
|
||||||
|
(model): Qwen3VLModel(
|
||||||
|
(visual): Qwen3VLVisionModel(
|
||||||
|
(patch_embed): Qwen3VLVisionPatchEmbed(
|
||||||
|
(proj): Conv3d(3, 1024, kernel_size=(2, 16, 16), stride=(2, 16, 16))
|
||||||
|
)
|
||||||
|
(pos_embed): Embedding(2304, 1024)
|
||||||
|
(rotary_pos_emb): Qwen3VLVisionRotaryEmbedding()
|
||||||
|
(blocks): ModuleList(
|
||||||
|
(0-23): 24 x Qwen3VLVisionBlock(
|
||||||
|
(norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
|
||||||
|
(norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
|
||||||
|
(attn): Qwen3VLVisionAttention(
|
||||||
|
(qkv): Linear(in_features=1024, out_features=3072, bias=True)
|
||||||
|
(proj): Linear(in_features=1024, out_features=1024, bias=True)
|
||||||
|
)
|
||||||
|
(mlp): Qwen3VLVisionMLP(
|
||||||
|
(linear_fc1): Linear(in_features=1024, out_features=4096, bias=True)
|
||||||
|
(linear_fc2): Linear(in_features=4096, out_features=1024, bias=True)
|
||||||
|
(act_fn): GELUTanh()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(merger): Qwen3VLVisionPatchMerger(
|
||||||
|
(norm): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
|
||||||
|
(linear_fc1): Linear(in_features=4096, out_features=4096, bias=True)
|
||||||
|
(act_fn): GELU(approximate='none')
|
||||||
|
(linear_fc2): Linear(in_features=4096, out_features=2048, bias=True)
|
||||||
|
)
|
||||||
|
(deepstack_merger_list): ModuleList(
|
||||||
|
(0-2): 3 x Qwen3VLVisionPatchMerger(
|
||||||
|
(norm): LayerNorm((4096,), eps=1e-06, elementwise_affine=True)
|
||||||
|
(linear_fc1): Linear(in_features=4096, out_features=4096, bias=True)
|
||||||
|
(act_fn): GELU(approximate='none')
|
||||||
|
(linear_fc2): Linear(in_features=4096, out_features=2048, bias=True)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(language_model): Qwen3VLTextModel(
|
||||||
|
(embed_tokens): Embedding(151936, 2048)
|
||||||
|
(layers): ModuleList(
|
||||||
|
(0-27): 28 x Qwen3VLTextDecoderLayer(
|
||||||
|
(self_attn): Qwen3VLTextAttention(
|
||||||
|
(q_proj): Linear(in_features=2048, out_features=2048, bias=False)
|
||||||
|
(k_proj): Linear(in_features=2048, out_features=1024, bias=False)
|
||||||
|
(v_proj): Linear(in_features=2048, out_features=1024, bias=False)
|
||||||
|
(o_proj): Linear(in_features=2048, out_features=2048, bias=False)
|
||||||
|
(q_norm): Qwen3VLTextRMSNorm((128,), eps=1e-06)
|
||||||
|
(k_norm): Qwen3VLTextRMSNorm((128,), eps=1e-06)
|
||||||
|
)
|
||||||
|
(mlp): Qwen3VLTextMLP(
|
||||||
|
(gate_proj): Linear(in_features=2048, out_features=6144, bias=False)
|
||||||
|
(up_proj): Linear(in_features=2048, out_features=6144, bias=False)
|
||||||
|
(down_proj): Linear(in_features=6144, out_features=2048, bias=False)
|
||||||
|
(act_fn): SiLUActivation()
|
||||||
|
)
|
||||||
|
(input_layernorm): Qwen3VLTextRMSNorm((2048,), eps=1e-06)
|
||||||
|
(post_attention_layernorm): Qwen3VLTextRMSNorm((2048,), eps=1e-06)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(norm): Qwen3VLTextRMSNorm((2048,), eps=1e-06)
|
||||||
|
(rotary_emb): Qwen3VLTextRotaryEmbedding()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(lm_head): Linear(in_features=2048, out_features=151936, bias=False)
|
||||||
|
)
|
||||||
@ -588,7 +588,7 @@ def main():
|
|||||||
parser.add_argument("--model-path", type=str, default="./Qwen3-VL-2B-Instruct", help="Path to model weights")
|
parser.add_argument("--model-path", type=str, default="./Qwen3-VL-2B-Instruct", help="Path to model weights")
|
||||||
parser.add_argument("--dataset-path", type=str, default="./data", help="Path to validation dataset")
|
parser.add_argument("--dataset-path", type=str, default="./data", help="Path to validation dataset")
|
||||||
parser.add_argument("--output", type=str, default="result.json", help="Output JSON file path")
|
parser.add_argument("--output", type=str, default="result.json", help="Output JSON file path")
|
||||||
parser.add_argument("--num-samples", type=int, default=None, help="Number of samples to evaluate (default: all)")
|
parser.add_argument("--num-samples", type=int, default=100, help="Number of samples to evaluate (default: all)")
|
||||||
parser.add_argument("--random-seed", type=int, default=None, help="Random seed for reproducibility")
|
parser.add_argument("--random-seed", type=int, default=None, help="Random seed for reproducibility")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|||||||
Reference in New Issue
Block a user