1.数据集搭建 使用经典的csv读取数据,然后再进行构建自己的dataset的时候,通过csv ,直接获得图片地址,使用pil来读取图像,使用int强转label(label本来是是string,通过构建字典dict来得到的)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 def __getitem__ (self, idx ): img, label = self .images[idx], self .labels[idx] tf = transforms.Compose([ lambda x: Image.open (x).convert('RGB' ), transforms.Resize((int (self .resize * 1.25 ), int (self .resize * 1.25 ))), transforms.RandomRotation(15 ), transforms.CenterCrop(self .resize), transforms.ToTensor(), transforms.Normalize(mean=[0.485 , 0.456 , 0.406 ], std=[0.229 , 0.224 , 0.225 ]) ]) img = tf(img) label = torch.tensor(label) return img, label
直接读取所有的文件,构建csv
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 def make_csv (self ): """ 构建csv,返回当前路径的图片的属性 :return: """ file_name = './label.csv' type_dir = glob.glob(os.path.join(self .root, '*' )) with open (file_name, 'w' ) as csvfile: writer = csv.writer(csvfile) for type in type_dir: person_dir = glob.glob(os.path.join(type , '*' )) for person in person_dir: emotion_dir = glob.glob(os.path.join(person, '*' )) for emotion in emotion_dir: label = emotion.split('/' )[-1 ] img_dir = glob.glob(os.path.join(emotion, '*.jpeg' )) for img in img_dir: writer.writerow([img, label]) print ("writting one person" ) print ("process ok" )
、
之后就是进行分割数据集,train,val,test=6:2:2
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 def __init__ (self, path, resize, mode ): super (NIREmotionDatasets, self ).__init__() self .class_dir = {"Anger" : 0 , "Disgust" : 1 , "Fear" : 2 , "Happiness" : 3 , "Sadness" : 4 , "Surprise" : 5 } self .root = path self .resize = resize self .images, self .labels = self .load_csv('./label.csv' ) if mode == "train" : self .images = self .images[:int (0.6 * len (self .images))] self .labels = self .labels[:int (0.6 * len (self .labels))] elif mode == 'val' : self .images = self .images[int (0.6 * len (self .images)):int (0.8 * len (self .images))] self .labels = self .labels[int (0.6 * len (self .labels)):int (0.8 * len (self .labels))] elif mode == 'test' : self .images = self .images[int (0.8 * len (self .images)):int (0.99999 * len (self .images))] self .labels = self .labels[int (0.8 * len (self .labels)):int (0.99999 * len (self .labels))]
2.train模板 因为resnet在timm库里直接由,我们直接进行调用
构建代码步骤
构建argparse,直接复制粘贴
构建dataloader,传入我们的datasets,然后进行shuttle,设置batch
进行设置crition,adam还有model,模型直接进行调用,然后传入之前的pth,使用load
设置model的最后一层为class层数 ,进行修改后放入device
之后就是for语句了,在epoch里面
首先是进行验证,验证,是计算正确率(这个就是,accurate的计算方法
之后就是train,train需要设置为train模式,然后这个是计算loss的
上面的常规就是zerograd,然后计算pred,pred与y计算loss,之后,loss进行反串,优化器进行更新
一轮后更新学习率lr
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 best_acc = 0 print (args.input_size) datasets_train = build_datasets('train' , args) datasets_val = build_datasets('test' , args) emotion_idx = datasets_train.class_dir train_loader = torch.utils.data.DataLoader(datasets_train, batch_size=args.batch_size, shuffle=True , num_workers=args.num_workers) val_loader = torch.utils.data.DataLoader(datasets_val, batch_size=args.batch_size, shuffle=True , num_workers=args.num_workers) model_resnet = resnet18(pretrained=True ) checkpoint = torch.load('/home/tonnn/.nas/weijia/work/fer/baseline/resnet_base/resnet18_msceleb .pth' ) model_resnet.load_state_dict(checkpoint['state_dict' ], strict=True ) model = nn.Sequential(*list (model_resnet.children())[:-1 ], Flatten(), nn.Linear(512 , 6 ) ).to(device) val_num = len (val_loader) train_num = len (train_loader) print ("using {} images for training, {} images for validation." .format (train_num, val_num)) params = list (model.parameters()) optimizer = torch.optim.SGD(params, lr=args.lr, weight_decay=1e-4 , momentum=0.9 ) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10 , gamma=0.1 ) criteon = nn.CrossEntropyLoss() for epoch in range (args.epochs): print (f'Epoch {epoch} ' ) print (f'length of dataloader_train is {len (train_loader)} ' ) acc = 0.0 if epoch % 1 == 0 : print ("Evaluating..." ) model.eval () with torch.no_grad(): for x, y in val_loader: x, y = x.to(device), y.to(device) logits = model(x) pred = logits.argmax(dim=1 ) acc += torch.eq(pred, y.to(device)).sum ().item() val_acc = acc / len (val_loader) print ('[epoch %d] val_accuracy: %.3f' % (epoch + 1 , val_acc)) if val_acc > best_acc: best_acc = val_acc torch.save(model.state_dict(), 'accuracy_loss_change_train_50.mdl' ) model.train() for step,(x,y) in enumerate (train_loader): x, y = x.to(device), y.to(device) logits = model(x) loss = criteon(logits,y) optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step()
3.进行预测test还有读取pth继续进行训练 预测函数predict,我们新建一个文件,然后还是要把预测的图片路径传入进来,然后就是进行加载模型,载入权重,之后使用nograd来进行计算,最后还需要id到属性的字典来进行映射
使用dataset读取图片传入到dataloader
设置enviroment来进行多卡推理
设置模型,进行加载权重,使用模型的load’函数进行加载
最后就是with nograd’,进行推理,推理的时候,数据也要传到cuda上面
然后找到最大的索引,之后通过反向查询,得到他的名称
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 os.environ['CUDA_VISIBLE_DEVICES' ] = '0,1' device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu" ) test_datasets = build_datasets("test" , get_args_parser().parse_args()) test_loader = torch.utils.data.DataLoader( test_datasets, batch_size=args.batch_size, shuffle=False , num_workers=args.num_workers) class_idx = test_datasets.class_dir pth_path = 'accuracy_loss_change_train_50.mdl' model = resnet18() model = nn.Sequential(*list (model.children())[:-1 ], Flatten(), nn.Linear(512 , 6 ) ).to(device) model.load_state_dict(torch.load(pth_path)) print ('loaded from ckpt!' ) model = nn.DataParallel(model) model = model.cuda() model.eval () acc = 0 ans = [] with torch.no_grad(): for x, y in test_loader: x, y = x.cuda(), y.cuda() logits = model(x) pred = logits.argmax(dim=1 ) for x1 in pred: ans.append(find(class_idx, int (x1))) acc += torch.eq(pred, y.to(device)).sum ().float ().item()
继续训练和之前的predict一样,看args的startt是不是0,不是0,就进行加载权重
1 2 3 if args.start_epoch !=0: model.load_state_dict = torch.load('accuracy_loss_change_train_1003.mdl')
4.pytorch多卡运行,读取还有保存权重 多卡运行,上面已经有操作了
首先设置os的黄精为0,1
然后加载到dataparallel上面
之后模型移动到cuda
然后在训练和预测的时候数据移动到cuda
和上面没有什么差别,支部够要移动一下,普通是移动到device
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 criteon = nn.CrossEntropyLoss() model = nn.DataParallel(model) model = model.cuda() for epoch in range (0 ,args.epochs): print (f'Epoch {epoch} ' ) print (f'length of dataloader_train is {len (train_loader.dataset)} ' ) acc = 0.0 if epoch % 1 == 0 : print ("Evaluating..." ) model.eval () with torch.no_grad(): for x, y in val_loader: x, y = x.cuda(), y.cuda() logits = model(x) pred = logits.argmax(dim=1 ) acc += torch.eq(pred, y.cuda()).sum ().item() val_acc = acc / len (val_loader.dataset) print ('[epoch %d] val_accuracy: %.3f' % (epoch + 1 , val_acc)) if val_acc > best_acc: best_acc = val_acc file_name = '/home/tonnn/.nas/weijia/work/fer/baseline/resnet_base/checkpoint/accuracy_loss_change_train_' +str (epoch)+'.pth' torch.save(model.state_dict(), file_name) model.train() for step,(x,y) in enumerate (train_loader): x, y = x.cuda(), y.cuda() logits = model(x) loss = criteon(logits,y) optimizer.zero_grad() loss.backward() optimizer.step() print (r"train epoch[{}/{}] loss:{:.3f}" .format (epoch + 1 , args.epochs, loss.item())) scheduler.step()
附录:mtcnn进行裁剪