ai学习之在云端训练一个模型
平台:魔塔
https://www.modelscope.cn/
在上面创建一个notebook
配置环境
pip install "transformers>=4.57" "qwen_vl_utils>=0.0.14" pip install "ms-swift>=3.9.1" pip install modelscope下载模型
modelscope download --model Qwen/Qwen3-VL-2B-Instruct --local_dir /mnt/workspace/models/Qwen/Qwen3-VL-2B-Instruct上传数据集
from modelscope.hub.api import HubApi # 1. 登录 api = HubApi() api.login('命令牌') # 2. 上传数据集 # repo_id: 你的用户名/数据集仓库名 # folder_path: 本地包含图片、json、metadata.jsonl 的文件夹路径(注意这里参数名也变了) # repo_type: 必须指定为 'dataset',否则默认会上传为模型 api.upload_folder( repo_id='BECAUSEACC/rock', folder_path='./', repo_type='dataset', commit_message='upload dataset folder to repo' )notebook上下载数据集
modelscope download --dataset BECAUSEACC/rock --local_dir ./rock_data开始训练
CUDA_VISIBLE_DEVICES=0 swift sft \ --model "/mnt/workspace/models/Qwen/Qwen3-VL-2B-Instruct" \ --train_type lora \ --dataset './data_swift/train_messages.jsonl' \ --val_dataset './data_swift/val_messages.jsonl' \ --torch_dtype bfloat16 \ --num_train_epochs 3 \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ --learning_rate 5e-5 \ --lora_rank 16 \ --lora_alpha 32 \ --target_modules q_proj k_proj v_proj o_proj gate_proj up_proj down_proj \ --gradient_accumulation_steps 8 \ --eval_steps 300 \ --save_steps 300 \ --save_total_limit 2 \ --logging_steps 20 \ --output_dir ./output_qwen_vl_lora_v2 \ --gradient_checkpointing true \ --quant_method bnb \ --quant_bits 4训练完成后打包下载
swift export \ --model /mnt/workspace/models/Qwen/Qwen3-VL-2B-Instruct \ --adapters ./output_qwen_vl_lora/v0-20260428-172313/checkpoint-38 \ --merge_lora true \ --output_dir ./qwen_vl_final_package测试模型
import torch from transformers import AutoModelForImageTextToText, AutoProcessor from PIL import Image model_path = r"B:\Pycharm_PROJECT\picture\checkpoint-38-merged" # 改为你的实际路径 image_path = r"B:\Pycharm_PROJECT\picture\my_images\train\010_olivinite\010_olivinite_3.jpg" # 改为你的图片路径 print("正在加载模型...") model = AutoModelForImageTextToText.from_pretrained( model_path, dtype=torch.bfloat16, # 注意改成 dtype,消除警告 device_map="auto", trust_remote_code=True ) processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True) print("模型加载完成!") image = Image.open(image_path) query = "这张图片是什么岩石?" messages = [{"role": "user", "content": [ {"type": "image", "image": image}, {"type": "text", "text": query} ]}] # ---------- 修正部分 ---------- # 方法1:先获得文本模板,再构造输入 text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = processor(text=text, images=[image], return_tensors="pt").to(model.device) # ----------------------------- outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.1) response = processor.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) print(f"用户: {query}") print(f"模型: {response}")