pytorch使用CPU多进程多核训练模型--转载自“呆萌的代Ma”

pytorch使⽤CPU多进程多核训练模型--转载⾃“呆萌的代Ma”本⽂⽬录
导包
import torch.multiprocessing as mp
import torch
as nn
import torch.utils.data as Data
这⾥使⽤torch.multiprocessing构造多进程,与python的多进程神似
准备训练数据(同单进程)
这⾥使⽤随机数⽣成训练数据,就不⽤下载什么数据了
def get_train_data():
"""得到训练数据,这⾥使⽤随机数⽣成训练数据,由此导致最终结果并不好"""
def get_tensor_from_pd(dataframe_series)-> torch.Tensor:
sor(data=dataframe_series.values)
import numpy as np
import pandas as pd
from sklearn import preprocessing
# ⽣成训练数据x并做归⼀化后,构造成dataframe格式,再转换为tensor格式
df = pd.DataFrame(data=preprocessing.MinMaxScaler().fit_transform(np.random.randint(0,10, size=(2000,300))))
y = pd.Series(list(range(2000)))
return get_tensor_from_pd(df).float(), get_tensor_from_pd(y).float()
构造模型(同单进程)
这⾥使⽤⼀个LSTM的AutoEncoder,可以换成任何你想要训练的模型哈~,多进程与模型是什么⽆关
class LstmFcAutoEncoder(nn.Module):
def__init__(self, input_layer=300, hidden_layer=100, batch_size=20):
super(LstmFcAutoEncoder, self).__init__()
self.input_layer = input_layer
self.hidden_layer = hidden_layer
self.batch_size = batch_size
self.decoder_lstm = nn.LSTM(self.hidden_layer, self.input_layer, batch_first=True)
self.decoder_fc = nn.Linear(self.hidden_layer, self.hidden_layer)
def forward(self, input_x):
input_x = input_x.view(len(input_x),1,-1)
# encoder
encoder_lstm,(n, c)= der_lstm(input_x,
# shape: (n_layers, batch, hidden_size)
(s(1, self.batch_size, self.hidden_layer),
encoder_fc = der_fc(encoder_lstm)
encoder_out = lu(encoder_fc)
# decoder
decoder_fc = lu(self.decoder_fc(encoder_out))
decoder_lstm,(n, c)= self.decoder_lstm(decoder_fc,
(s(1,20, self.input_layer),
双生筷return decoder_lstm.squeeze()
构造训练代码(同单进程)
由于多个进程之间是隔离的,因此除了模型参数会共享,其余都不共享:损失函数、优化器、迭代次数不共享
def train(model, data_loader, loss_function, optimizer, epochs):
for i in range(epochs):
for seq, labels in data_loader:
<_grad()
y_pred = model(seq).squeeze()# 压缩维度:得到输出,并将维度为1的去除
single_loss = loss_function(y_pred, seq)
single_loss.backward()
optimizer.step()
开始训练(这⾥设置多进程!)
if __name__ =='__main__':
model = LstmFcAutoEncoder()
x, y = get_train_data()
这⾥将data构造为DataLoader,当然不构造也可以
train_loader = Data.DataLoader(
dataset=Data.TensorDataset(x, y),# 封装进Data.TensorDataset()类的数据,可以为任意维度
batch_size=20,# 每块的⼤⼩
shuffle=True,# 要不要打乱数据 (打乱⽐较好)
num_workers=3,# 多进程(multiprocess)来读数据
)
常规训练三件套
loss_function = nn.MSELoss() # loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 优化器
epochs = 150
开始多进程的设置:
# 多进程开始
num_processes =4# 设置4个进程
# NOTE: this is required for the ``fork`` method to work
model.share_memory()
processes =[]
for rank in range(num_processes):
# 4 个进程,每个进程epoch为150,也就是说其实迭代了 4*150 = 600 次 !!!
p = mp.Process(target=train, args=(model, train_loader, loss_function, optimizer, epochs))
p.start()
processes.append(p)
for p in processes:
p.join()
如果需要尽情压榨cpu的效率,可以修改为:
from multiprocessing import cpu_count
num_processes = cpu_count()
验证多进程下参数的有效性
1. 打开控制台看cpu是否多进程(当然是啦)
2. 验证参数:在调试时打断点(建议打到61⾏:optimizer.step()),⽤⼀个进程迭代⼏轮,然后看其他每个进程的模型的参数:der_fc.state_dict())# 看model的encoder_fc层的参数
完整的代码
import torch.multiprocessing as mp
import torch
as nn
import torch.utils.data as Data
def get_train_data():
"""得到训练数据,这⾥使⽤随机数⽣成训练数据,由此导致最终结果并不好"""
山药去皮机
def get_tensor_from_pd(dataframe_series)-> torch.Tensor:
sor(data=dataframe_series.values)
import numpy as np
import pandas as pd
from sklearn import preprocessing
# ⽣成训练数据x并做归⼀化后,构造成dataframe格式,再转换为tensor格式
df = pd.DataFrame(data=preprocessing.MinMaxScaler().fit_transform(np.random.randint(0,10, size=(2000,300))))
y = pd.Series(list(range(2000)))
return get_tensor_from_pd(df).float(), get_tensor_from_pd(y).float()
class LstmFcAutoEncoder(nn.Module):
def__init__(self, input_layer=300, hidden_layer=100, batch_size=20):
super(LstmFcAutoEncoder, self).__init__()
self.input_layer = input_layer
self.hidden_layer = hidden_layer
self.batch_size = batch_size
self.decoder_lstm = nn.LSTM(self.hidden_layer, self.input_layer, batch_first=True)
self.decoder_fc = nn.Linear(self.hidden_layer, self.hidden_layer)
self.decoder_fc = nn.Linear(self.hidden_layer, self.hidden_layer)
def forward(self, input_x):
input_x = input_x.view(len(input_x),1,-1)
# encoder
encoder_lstm,(n, c)= der_lstm(input_x,
# shape: (n_layers, batch, hidden_size)
(s(1, self.batch_size, self.hidden_layer),
encoder_fc = der_fc(encoder_lstm)
encoder_out = lu(encoder_fc)
# decoder
多媒体互动教学系统decoder_fc = lu(self.decoder_fc(encoder_out))
decoder_lstm,(n, c)= self.decoder_lstm(decoder_fc,
(s(1,20, self.input_layer),
return decoder_lstm.squeeze()
def train(model, data_loader, loss_function, optimizer, epochs):
for i in range(epochs):
for seq, labels in data_loader:
<_grad()
y_pred = model(seq).squeeze()# 压缩维度:得到输出,并将维度为1的去除
single_loss = loss_function(y_pred, seq)
single_loss.backward()
optimizer.step()
if __name__ =='__main__':
model = LstmFcAutoEncoder()
x, y = get_train_data()
train_loader = Data.DataLoader(
dataset=Data.TensorDataset(x, y),# 封装进Data.TensorDataset()类的数据,可以为任意维度        batch_size=20,# 每块的⼤⼩
shuffle=True,# 要不要打乱数据 (打乱⽐较好)
num_workers=3,# 多进程(multiprocess)来读数据
)
# 常规训练三件套
loss_function = nn.MSELoss()# loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)# 优化器
继电器延时电路
epochs =150
# 多进程开始
# from multiprocessing import cpu_count
#
# num_processes = cpu_count()
num_processes =4# 4个进程
led显指数
# NOTE: this is required for the ``fork`` method to work
model.share_memory()
processes =[]
wan-107for rank in range(num_processes):
# 4 个进程,每个进程epoch为150,也就是说其实迭代了 4*150 = 600 次 !!!
p = mp.Process(target=train, args=(model, train_loader, loss_function, optimizer, epochs))        p.start()
processes.append(p)
for p in processes:
p.join()

本文发布于:2024-09-22 15:46:38,感谢您对本站的认可!

本文链接:https://www.17tex.com/tex/2/101667.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:进程   训练   数据   模型   维度   构造   参数   效率
留言与评论(共有 0 条评论)
   
验证码:
Copyright ©2019-2024 Comsenz Inc.Powered by © 易纺专利技术学习网 豫ICP备2022007602号 豫公网安备41160202000603 站长QQ:729038198 关于我们 投诉建议