玩转Kaggle:ClassifyLeaves(叶子分类)——模型设计与训练

2023年12月21日发(作者：劳斯莱斯银魅)

# Print print(f'FOLD {fold}') print('--------------------------------------') # Sample elements randomly from a given list of ids, no replacement. train_subsampler = RandomSampler(train_ids) valid_subsampler = RandomSampler(valid_ids) # Define data loaders for training and testing data in this fold trainloader = ader(train_dataset, batch_size=64, sampler=train_subsampler, num_workers=4) validloader = ader(train_dataset, batch_size=64, sampler=valid_subsampler, num_workers=4)

# Initialize a model and put it on the device specified. model = resnet_model(176) model = (device) = device

# Initialize optimizer optimizer = (ters(),lr=learning_rate,weight_decay= weight_decay) # optimizer = SWA(our_optimizer, swa_start=5, swa_freq =5, swa_lr=0.05) scheduler = CosineAnnealingLR(optimizer,T_max=10)

# Run the training loop for defined number of epochs for epoch in range(0,num_epochs): () # Print epoch print(f'Starting epoch {epoch+1}') # These are used to record information in training train_losses = [] train_accs = [] # Iterate the training set by batches for batch in tqdm(trainloader): # Move images and labels to GPU imgs, labels = batch imgs = (device) labels = (device) # Forward the data logits = model(imgs) # Calculate loss loss = loss_function(logits,labels) # Clear gradients in previous step _grad() # Compute gradients for parameters rd() # Update the parameters with computed gradients () # Compute the accuracy for current batch. acc = ((dim=-1) == labels).float().mean() # Record the loss and accuracy. train_(()) train_(acc)# print("第%d个epoch的学习率：%f" % (epoch+1,_groups[0]['lr'])) () # The average loss and accuracy of the training set is the average of the recorded values. train_loss = (train_losses) / len(train_losses) train_acc = (train_accs) / len(train_accs) # Print the information. print(f"[ Train | {epoch + 1:03d}/{num_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")# print(f"[ Train | {epoch + 1:03d}/{num_epochs:03d} ] loss = {train_loss:.5f}") # Train process (all epochs) is complete print('Training process has finished. Saving trained model.') print('Starting validation') # Saving the model print('saving model with loss {:.3f}'.format(train_loss)) save_path = f'./model/leaf/resnet-fold-{fold}.pth' (_dict(),save_path)

# Start Validation () valid_losses = [] valid_accs = [] with _grad(): for batch in tqdm(validloader):

1234images/geimages/ges/ges/26num_label9num_label9num_label2num_label3num_label4120102120# transpose the predict result (for calculate the mode of result easier)df_all_transpose = df_().drop(['image'],axis=1).transpose()df_all_()0num_label0num_label1num_label2num_8797535353879253535387947373737346879535348879777787987373737346879945737373735 rows × 8800 columns# vote the best result(mode)df_mode = df_all_().transpose()df_()00123422.026.0120.0102.0120.01NaNNaNNaNNaNNaN2NaNNaNNaNNaNNaN3NaNNaNNaNNaNNaN4NaNNaNNaNNaNNaN# conver the best result back to textvoting_class = []for each in df_mode[0]: voting_(label_inv_map[each])df_all['label'] = voting_classdf_()image01234images/ges/ges/ges/ges/_label21num_label2120num_label22229120112120num_label32226120102121num_label42226120102120labelasimina_trilobabetula_nigraplatanus_acerifoliapinus_bungeanaplatanus_acerifolia# save the best result as csv# choose columns image and lable as the resultdf_submission = df_all[['image','label']].copy()# save the the result filedf__csv('./kaggle_submission/leaf/', index=False)print('Voting results of resnest successfully saved!')Voting results of resnest successfully saved!1.5 kaggle提交

# Print epoch print(f'Starting epoch {epoch+1}') # These are used to record information in training train_losses = [] train_accs = [] # Iterate the training set by batches for batch in tqdm(trainloader): # Move images and labels to GPU imgs, labels = batch imgs = (device) labels = (device) # Forward the data logits = model(imgs) # Calculate loss loss = loss_function(logits,labels) # Clear gradients in previous step _grad() # Compute gradients for parameters rd() # Update the parameters with computed gradients () # Compute the accuracy for current batch. acc = ((dim=-1) == labels).float().mean() # Record the loss and accuracy. train_(()) train_(acc)# print("第%d个epoch的学习率：%f" % (epoch+1,_groups[0]['lr'])) () # The average loss and accuracy of the training set is the average of the recorded values. train_loss = (train_losses) / len(train_losses) train_acc = (train_accs) / len(train_accs) # Print the information. print(f"[ Train | {epoch + 1:03d}/{num_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")# print(f"[ Train | {epoch + 1:03d}/{num_epochs:03d} ] loss = {train_loss:.5f}") # Train process (all epochs) is complete print('Training process has finished. Saving trained model.') print('Starting validation') # Saving the model print('saving model with loss {:.3f}'.format(train_loss)) save_path = f'./model/leaf/resneXt-fold-{fold}.pth' (_dict(),save_path)

# Start Validation () valid_losses = [] valid_accs = [] with _grad(): for batch in tqdm(validloader): imgs, labels = batch # No gradient in validation logits = model((device)) loss = loss_function(logits,(device)) acc = ((dim=-1) == (device)).float().mean() # Record loss and accuracy valid_(())

valid_(acc) # The average loss and accuracy valid_loss = (valid_losses)/len(valid_losses) valid_acc = (valid_accs)/len(valid_accs) print(f"[ Valid | {epoch + 1:03d}/{num_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}") print('Accuracy for fold %d: %d' % (fold, valid_acc)) print('--------------------------------------') results[fold] = valid_acc# Print fold resultsprint(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds} FOLDS')print('--------------------------------')total_summation = 0.0for key, value in (): print(f'Fold {key}: {value} ') total_summation += valueprint(f'Average: {total_summation/len(())} ')

----------------------------------------------------------------------------K-FOLD CROSS VALIDATION RESULTS FOR 5 FOLDS--------------------------------Fold 0: 0.78816

Fold 1: 0.79101

Fold 2: 0.857

Fold 3: 0.7923932075500488

Fold 4: 0.7697393894195557

Average: 0.78986

2.3 Resnext模型预测testloader = ader(test_dataset,batch_size=64, num_workers=4)## predictmodel = resnext_model(176)# create model and load weights from checkpointmodel = (device)# load the all foldsfor test_fold in range(k_folds): model_path = f'./model/leaf/resneXt-fold-{test_fold}.pth' saveFileName = f'./kaggle_submission/leaf/resneXt-submission-fold-{test_fold}.csv' _state_dict((model_path))

# Make sure the model is in eval mode. # Some modules like Dropout or BatchNorm affect if the model is in training mode. () tta_model = ficationTTAWrapper(model, _crop_transform(200,200)) # Test-Time Augmentation # Initialize a list to store the predictions. predictions = [] # Iterate the testing set by batches. for batch in tqdm(testloader): imgs = batch with _grad(): logits = tta_model((device)) # Take the class with greatest logit as prediction and record it. ((dim=-1).cpu().numpy().tolist()) preds = [] for i in predictions: (label_inv_map[i])

test_data = _csv('./data/classify-leaves/') test_data['label'] = (preds) submission = ([test_data['image'], test_data['label']], axis=1) _csv(saveFileName, index=False) print("ResNeSt Model Results Done")100%|██████████| 138/138 [01:45<00:00, 1.31it/s]ResNeSt Model Results Done2.4 K-Fold模型投票获取最优解# load the result of 5-fold cross validationdf0 = _csv('./kaggle_submission/leaf/')df1 = _csv('./kaggle_submission/leaf/')df2 = _csv('./kaggle_submission/leaf/')df3 = _csv('./kaggle_submission/leaf/')df4 = _csv('./kaggle_submission/leaf/')

#convert the result to a numberlist_num_label0,list_num_label1,list_num_label2,list_num_label3,list_num_label4 = [],[],[],[],[]for i in range(len(df0)): list_num_(label_map[df0['label'][i]]) list_num_(label_map[df1['label'][i]])

list_num_(label_map[df2['label'][i]]) list_num_(label_map[df3['label'][i]]) list_num_(label_map[df4['label'][i]])# concate all the datadf_all = ()df_(['label'],axis=1,inplace=True)df_all['num_label0'] = list_num_label0df_all['num_label1'] = list_num_label1df_all['num_label2'] = list_num_label2df_all['num_label3'] = list_num_label3df_all['num_label4'] = list_num_label4df_()image01234images/ges/ges/ges/ges/_label20num_label2120num_label22226120102120num_label322num_label4220# transpose the predict result (for calculate the mode of result easier)df_all_transpose = df_().drop(['image'],axis=1).transpose()df_all_()0num_label0num_label1num_label2num_879753535387927535353879473737373738795753535387977737373879973737373735 rows × 8800 columns# vote the best result(mode)df_mode = df_all_().transpose()df_()00123422.026.0120.0102.0120.01NaN121.0NaNNaNNaN2NaNNaNNaNNaNNaN3NaNNaNNaNNaNNaN4NaNNaNNaNNaNNaN# conver the best result back to textvoting_class = []for each in df_mode[0]: voting_(label_inv_map[each])df_all['label'] = voting_classdf_()imagenum_label0num_label1num_label2num_label3num_label4label

本文发布于:2024-09-22 14:32:06，感谢您对本站的认可！

本文链接：https://www.17tex.com/fanyi/19909.html

上一篇：classify facts阅读中的例子

下一篇：classify的名词形式

标签：模型银魅作者获取投票预测

留言与评论（共有 0 条评论）