当前位置: 首页 > news >正文

回归实战2

import torch import matplotlib.pyplot as plt #画图 import matplotlib import numpy as np #矩阵相关 import csv #csv文件 import pandas #csv文件 from torch.utils.data import Dataset, DataLoader import torch.nn as nn from torch import optim import time class CovidDataset(Dataset): def __init__(self, file_path, mode): #mode用来选择是训练集,验证集还是测试集 with open(file_path, "r") as f: ori_data = list(csv.reader(f)) #读文件 csv_data = np.array(ori_data)[1:, 1:].astype(float) #不要第一行和第一列,并转化为数字 #逢五取1,不推荐 if mode == "train": indices = [i for i in range(len(csv_data)) if i % 5 != 0] #取下标 elif mode == "val": indices = [i for i in range(len(csv_data)) if i % 5 == 0] elif mode == "test": indices = [i for i in range(len(csv_data))] X = torch.tensor(csv_data[indices, :93]) #剩下的数据取x,转化为张量 if mode != "test": self.Y = torch.tensor(csv_data[indices, -1]) #取y self.X = (X - X.mean(dim=0, keepdim=True)) / X.std(dim=0, keepdim=True) #对每一列标准化,数据的量纲不一样,需要标准化:x-均值/标准差 self.mode = mode def __getitem__(self, item): #给一个下标,返回数值 if self.mode == "test": #依旧测试集没有y return self.X[item].float() #将x的值转化为32位 else: return self.X[item].float(), self.Y[item].float() def __len__(self): return len(self.X) class myModel(nn.Module): def __init__(self, inDim): #输入维度 super(myModel, self).__init__() self.fc1 = nn.Linear(inDim, 128) #全连接 self.relu1 = nn.ReLU() #激活函数 self.fc2 = nn.Linear(128, 1) def forward(self, x): #数据x通过模型 x = self.fc1(x) x = self.relu1(x) x = self.fc2(x) if len(x.size()) > 1: x = x.squeeze(1) #如果x维度大于1, 就去掉第二个维度,与y的维度保持一致 return x def train_val(model, train_loader, val_loader, lr, optimizer, device, epochs, save_path): model = model.to(device) #防止意外 plt_train_loss = [] #总训练loss plt_val_loss = [] min_val_loss = 999999999999999999.9 for epoch in range(epochs): #发枪指令, 冲锋的号角, 模型训练的开始 model.train() start_time = time.time() train_loss = 0.0 #浮点形式 for x, y in train_loader: x, y = x.to(device), y.to(device) y_pred = model(x) bat_loss = loss(y_pred, y, model) bat_loss.backward() optimizer.step() optimizer.zero_grad() train_loss += bat_loss.cpu().item() plt_train_loss.append(train_loss/train_loader.__len__()) model.eval() val_loss = 0.0 with torch.no_grad(): for val_x, val_y in val_loader: val_x, val_y = val_x.to(device), val_y.to(device) val_pred_y = model(val_x) val_bat_loss = loss(val_pred_y, val_y, model) val_loss += val_bat_loss.cpu().item() plt_val_loss.append(val_loss / val_loader.__len__()) #保存 if val_loss < min_val_loss: min_val_loss = val_loss torch.save(model, save_path) print("[%03d/%03d] %2.2f sec(s) train_loss: %.6f val_loss:%.6f" % \ (epoch, epochs, time.time()-start_time, plt_train_loss[-1], plt_val_loss[-1])) plt.plot(plt_train_loss) plt.plot(plt_val_loss) plt.title("loss") plt.legend(["train", "val"]) plt.show() def evaluate(model_path, test_loader, rel_path, device): model = torch.load(model_path).to(device) rel = [] #记录预测结果 model.eval() with torch.no_grad(): for x in test_loader: x = x.to(device) pred = model(x) rel.append(pred.cpu().item()) with open(rel_path, "w", newline="") as f: csv_writer = csv.writer(f) csv_writer.writerow(["id", "tested_positive"]) for i, pred in enumerate(rel): #同时得到 第几个 和第几个的结果 enumrate csv_writer.writerow([str(i), str(pred)]) print("结果保存到了"+rel_path) train_file = r"D:\共享文件夹\李哥考研\课程\人工智能课程\beike代码\covid\covid.train.csv" test_file = r"D:\共享文件夹\李哥考研\课程\人工智能课程\beike代码\covid\covid.test.csv" # for x, y in train_set: # pred_y = model(x) # print(pred_y) batch_size = 16 train_set = CovidDataset(train_file, "train") val_set = CovidDataset(train_file, "val") test_set = CovidDataset(test_file, "test") train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) #取batch_size的数据,打乱随机选 val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True) test_loader = DataLoader(test_set, batch_size=1, shuffle=False) def mseLoss(pred, target, model): loss = nn.MSELoss(reduction='mean') #平方差损失 ''' Calculate loss ''' regularization_loss = 0 # 正则项 for param in model.parameters(): # TODO: you may implement L1/L2 regularization here # 使用L2正则项 # regularization_loss += torch.sum(abs(param)) regularization_loss += torch.sum(param ** 2) # 计算所有参数平方 return loss(pred, target) + 0.00075 * regularization_loss # 返回损失。 loss = mseLoss #损失函数 epochs = 20 #运行轮次 lr = 0.001 #学习率 device = "cuda" if torch.cuda.is_available() else "cpu" #选择设备 print(device) data_dim = 93 model = myModel(data_dim).to(device) #数据放在设备上 save_path = "model_save/best_model.pth" rel_path = "pred.csv" optimizer = optim.SGD(params=model.parameters(), lr=lr, momentum=0.9) #优化器,梯度下降,momentum动量 train_val(model, train_loader, val_loader, lr, optimizer, device, epochs, save_path) #提交 evaluate(save_path, test_loader, rel_path, device)
http://www.jsqmd.com/news/491512/

相关文章:

  • 一次试样失败催生的技术革新:福尔蒂吹瓶专用ACR助剂逆向推演与流变拟合
  • 半监督食物图像分类项目
  • 国内首个,面向中小企业数据资产估值体系:“荟宸信科面向中小企业数据资产估值体系”正式发布(一)
  • iPhone开发 - %1$、%2$的写法
  • 就让我们从react的渲染逻辑出发吧
  • WordPress报错:preg_match() Compilation failed 错误解决方法
  • 【跨端技术ReactNative】JavaScript学习
  • 长亭 Xray Web 漏洞扫描器
  • 行业大咖谈数据资产|中海油如何规划数据资产管理?央企硬核实践拆解
  • 湘潭品牌设计公司权威推荐榜单
  • 零/负电价来了!储能业主如何抓住机遇?
  • 中小企业可用福尔蒂轻量化改性套件:含17种PA6/PBT配比+免费云端模拟
  • es为什么快面试回答
  • 筋膜提升第几天最肿
  • 深入解析HDFS:定义、架构、原理、应用场景及常用命令
  • 5 分钟搭建 Deepseek 私有化 RAG 知识库!支持多模型切换 + 激活验证 + 增量索引
  • 高级技巧-让AI自我迭代
  • 香港Web3区块链安全公司排行榜前三都有哪些公司?
  • openclaw、workbuddy上必装的12个RAG 应用 Skill 技能
  • 带你轻松了解半导体CIM系统之AMHS (二)
  • Android Studio 安装保姆级教程(mac版)
  • 巴菲特的持股策略:为什么长期持有是关键
  • 2026选不停机换单印刷机源头厂家,看这几点错不了,质量好的不停机换单印刷机技术实力与市场典范解析 - 品牌推荐师
  • 团队最佳实践
  • 改性塑料行业‘iOS生态’初现:福尔蒂开放6大基础配方API供下游二次开发
  • Redis性能提升3倍的5个冷门技巧,90%开发者都不知道!
  • 提示工程容灾备份策略中的网络依赖:架构师教你解决带宽+延迟问题
  • SQL Server 学习笔记:从 MySQL 到 SQL Server
  • 如何用NFC标签直接打开应用的某个功能?技术深度解析与商业落地
  • 开关接触不良?数据说了算!回路电阻测试仪应用与选购实战 - 品牌推荐大师