神经网络初探

克里斯蒂亚诺诺 2024-01-12 13:56:19  56757 赞同 0 反对 0

分类：资源标签：运维

神经网络初探这是一个人脸识别的神经网络训练部分

# -*- coding: utf-8 -*-"""Created on Fri Mar 19 13:19:54 2021
@author: 12069"""
'''零、 函数总体说明
一、数据导入
1. torch.utils.data.Dataset基类 or torchvision.datasets.ImageFolder 将路径和标签变列表
2. torch.utils.data.DataLoader对图像和标签列表分别封装成一个Tensor
3. 将Tensor数据类型封装成Variable数据类型。
补充：Sample
二、导入你的模型
三、定义损失函数criterion = nn.CrossEntropyLoss()
四、定义优化函数optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
五、定义学习率的变化策略scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
六、开始训练
1. 更新下学习率 scheduler.step()【制定了学习率的变化策略的原因】
2. 设置模型状态为训练状态 model.train(True)
3. 所有梯度置0 model.zero_grad()
4. 网络的前向传播model(inputs)
5. 得到损失criterion(outputs, labels)
6. torch.max预测该样本属于哪个类别的信息
7. optimizer.zero_grad()
8. loss.backward()回传损失，过程中会计算梯度
9. 根据这些梯度更新参数 optimizer.step()
0. 判断你是否有gpu可以用use_gpu = torch.cuda.is_available()'''#coding:utf8from __future__ import print_function, division
import torchimport torch.nn as nnimport torch.optim as optimfrom torch.optim import lr_schedulerfrom torch.autograd import Variableimport torchvisionfrom torchvision import datasets, models, transformsimport timeimport osfrom tensorboardX import SummaryWriterimport torch.nn.functional as Fimport numpy as np
import warnings
warnings.filterwarnings('ignore')
writer = SummaryWriter()import torch.nn as nnimport torch.nn.functional as F#nn.Conv2d，nn.BatchNorm2d 和 nn.Linear，分别是卷积层，BN 层和全连接层class simpleconv3(nn.Module):    def __init__(self):        super(simpleconv3,self).__init__()        self.conv1 = nn.Conv2d(3, 12, 3, 2)        self.bn1 = nn.BatchNorm2d(12)        self.conv2 = nn.Conv2d(12, 24, 3, 2)        self.bn2 = nn.BatchNorm2d(24)        self.conv3 = nn.Conv2d(24, 48, 3, 2)        self.bn3 = nn.BatchNorm2d(48)        self.fc1 = nn.Linear(48 * 5 * 5 , 1200)        self.fc2 = nn.Linear(1200 , 128)        self.fc3 = nn.Linear(128 , 4)
    def forward(self , x):        x = F.relu(self.bn1(self.conv1(x)))        #print "bn1 shape",x.shape        x = F.relu(self.bn2(self.conv2(x)))        x = F.relu(self.bn3(self.conv3(x)))        x = x.view(-1 , 48 * 5 * 5)         x = F.relu(self.fc1(x))        x = F.relu(self.fc2(x))        x = self.fc3(x)        return x
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):    for epoch in range(num_epochs):        print('Epoch {}/{}'.format(epoch, num_epochs - 1))        for phase in ['train', 'val']:            if phase == 'train':                scheduler.step()#更新下学习率                model.train(True)  # Set model to training mode设置模型状态为训练状态            else:                model.train(False)  # Set model to evaluate mode
            running_loss = 0.0            running_corrects = 0.0
            for data in dataloders[phase]:                inputs, labels = data                if use_gpu:                    inputs = Variable(inputs.cuda())#将Tensor数据类型封装成Variable数据类型                    labels = Variable(labels.cuda())                else:                    inputs, labels = Variable(inputs), Variable(labels)
                optimizer.zero_grad()#所有梯度置0                outputs = model(inputs) #网络的前向传播                _, preds = torch.max(outputs.data, 1)#预测该样本属于哪个类别的信息                loss = criterion(outputs, labels)#得到损失criterion                if phase == 'train':                    loss.backward()#回传损失，过程中会计算梯度                    optimizer.step()#根据这些梯度更新参数 
                running_loss += loss.data.item()                running_corrects += torch.sum(preds == labels).item()
            epoch_loss = running_loss / dataset_sizes[phase]            epoch_acc = running_corrects / dataset_sizes[phase]                       if phase == 'train':                writer.add_scalar('data/trainloss', epoch_loss, epoch)#在一个图表中记录一个标量的变化，常用于 Loss 和 Accuracy 曲线的记录                writer.add_scalar('data/trainacc', epoch_acc, epoch)            else:                writer.add_scalar('data/valloss', epoch_loss, epoch)                writer.add_scalar('data/valacc', epoch_acc, epoch)
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(                phase, epoch_loss, epoch_acc))
    writer.export_scalars_to_json("./all_scalars.json")#将 scalars 信息保存到 json 文件，便于后期使用    writer.close()    return model'''torchvision.datasets.ImageFolder就会返回一个列表（比如下面代码中的image_datasets[‘train’]或者image_datasets[‘val]），列表中的每个值都是一个tuple，每个tuple包含图像和标签信息。
class torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, num_workers=0, collate_fn=, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None)参数:
dataset (Dataset): 加载数据的数据集 batch_size (int, optional): 每批加载多少个样本 shuffle (bool, optional): 设置为“真”时,在每个epoch对数据打乱.（默认：False） sampler (Sampler, optional): 定义从数据集中提取样本的策略,返回一个样本 batch_sampler (Sampler, optional): like sampler, but returns a batch of indices at a time 返回一批样本. 与atch_size, shuffle, sampler和 drop_last互斥. num_workers (int, optional): 用于加载数据的子进程数。0表示数据将在主进程中加载。（默认：0） collate_fn (callable, optional): 合并样本列表以形成一个 mini-batch.  # callable可调用对象 pin_memory (bool, optional): 如果为 True, 数据加载器会将张量复制到 CUDA 固定内存中,然后再返回它们. drop_last (bool, optional): 设定为 True 如果数据集大小不能被批量大小整除的时候, 将丢掉最后一个不完整的batch,(默认：False). timeout (numeric, optional): 如果为正值，则为从工作人员收集批次的超时值。应始终是非负的。（默认：0） worker_init_fn (callable, optional): If not None, this will be called on each worker subprocess with the worker id (an int in ``[0, num_workers - 1]``) as input, after seeding and before data loading. (default: None)．'''if __name__ == '__main__':
    data_transforms = {#data_transforms是一个字典        'train': transforms.Compose([            transforms.RandomSizedCrop(48),            transforms.RandomHorizontalFlip(),            transforms.ToTensor(),            transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])        ]),        'val': transforms.Compose([            transforms.Scale(64),            transforms.CenterCrop(48),            transforms.ToTensor(),            transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])        ]),    }
    data_dir = './Emotion_Recognition_File/train_val_data/' # 数据集所在的位置

    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),#将路径和标签变列表                                              data_transforms[x]) for x in ['train', 'val']}    dataloders = {x: torch.utils.data.DataLoader(image_datasets[x],#对图像和标签列表分别封装成一个Tensor                                                 batch_size=64,                                                 shuffle=True if x=="train" else False,                                                 num_workers=0) for x in ['train', 'val']}
    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}#判断你是否有gpu可以用use_gpu = torch.cuda.is_available()    use_gpu = torch.cuda.is_available()    print("是否使用 GPU", use_gpu)    modelclc = simpleconv3()#导入你的模型#使用其他的模型   #model = models.resnet18(pretrained=True)#num_ftrs = model.fc.in_features#获取全连接层的输入channel个数#model.fc = nn.Linear(num_ftrs, 2)    print(modelclc)    if use_gpu:        modelclc = modelclc.cuda()
    criterion = nn.CrossEntropyLoss()#定义损失函数    optimizer_ft = optim.SGD(modelclc.parameters(), lr=0.1, momentum=0.9)#定义优化函数    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=100, gamma=0.1)#定义学习率的变化策略
    modelclc = train_model(model=modelclc,                           criterion=criterion,                           optimizer=optimizer_ft,                           scheduler=exp_lr_scheduler,                           num_epochs=2)  # 这里可以调节训练的轮次    if not os.path.exists("models"):        os.mkdir('models')    torch.save(modelclc.state_dict(),'models/model.ckpt')

测试部分

# -*- coding: utf-8 -*-"""Created on Fri Mar 19 13:19:06 2021
@author: 12069"""
# coding:utf8
import sysimport numpy as npimport cv2import osimport dlib
import torchimport torch.nn as nnimport torch.optim as optimfrom torch.optim import lr_schedulerfrom torch.autograd import Variableimport torchvisionfrom torchvision import datasets, models, transformsimport timefrom PIL import Imageimport torch.nn.functional as F
import matplotlib.pyplot as pltimport warnings
warnings.filterwarnings('ignore')

PREDICTOR_PATH = "./Emotion_Recognition_File/face_detect_model/shape_predictor_68_face_landmarks.dat"predictor = dlib.shape_predictor(PREDICTOR_PATH)## 配置 Dlib 关键点检测路径cascade_path = './Emotion_Recognition_File/face_detect_model/haarcascade_frontalface_default.xml'cascade = cv2.CascadeClassifier(cascade_path)## 人脸检测的接口，这个是 OpenCV 中自带的
if not os.path.exists("results"):    os.mkdir("results")    import torch.nn as nnimport torch.nn.functional as F
class simpleconv3(nn.Module):    def __init__(self):        super(simpleconv3,self).__init__()        self.conv1 = nn.Conv2d(3, 12, 3, 2)        self.bn1 = nn.BatchNorm2d(12)        self.conv2 = nn.Conv2d(12, 24, 3, 2)        self.bn2 = nn.BatchNorm2d(24)        self.conv3 = nn.Conv2d(24, 48, 3, 2)        self.bn3 = nn.BatchNorm2d(48)        self.fc1 = nn.Linear(48 * 5 * 5 , 1200)        self.fc2 = nn.Linear(1200 , 128)        self.fc3 = nn.Linear(128 , 4)
    def forward(self , x):        x = F.relu(self.bn1(self.conv1(x)))        #print "bn1 shape",x.shape        x = F.relu(self.bn2(self.conv2(x)))        x = F.relu(self.bn3(self.conv3(x)))        x = x.view(-1 , 48 * 5 * 5)         x = F.relu(self.fc1(x))        x = F.relu(self.fc2(x))        x = self.fc3(x)        return x'''归一化 （Normalization）、标准化 （Standardization）和中心化/零均值化 （Zero-centered）归一化：１）把数据变成(０，１)或者（1,1）之间的小数。主要是为了数据处理方便提出来的，把数据映射到0～1范围之内处理，更加便捷快速。２）把有量纲表达式变成无量纲表达式，便于不同单位或量级的指标能够进行比较和加权。归一化是一种简化计算的方式，即将有量纲的表达式，经过变换，化为无量纲的表达式，成为纯量。  标准化：在机器学习中，我们可能要处理不同种类的资料，例如，音讯和图片上的像素值，这些资料可能是高维度的，资料标准化后会使每个特征中的数值平均变为0(将每个特征的值都减掉原始资料中该特征的平均)、标准差变为1，这个方法被广泛的使用在许多机器学习算法中(例如：支持向量机、逻辑回归和类神经网络)。  中心化：平均值为0，对标准差无要求'''def standardization(data):    mu = np.mean(data, axis=0)    sigma = np.std(data, axis=0)    return (data - mu) / sigma

def get_landmarks(im):    rects = cascade.detectMultiScale(im, 1.3, 5)# # 人脸检测函数    x, y, w, h = rects[0]# 获取人脸的四个属性值，左上角坐标 x,y 、高宽 w、h    rect = dlib.rectangle(int(x), int(y), int(x + w), int(y + h))    return np.matrix([[p.x, p.y] for p in predictor(im, rect).parts()])

def annotate_landmarks(im, landmarks):    im = im.copy()    for idx, point in enumerate(landmarks):##使用enumerate 函数遍历序列中的元素以及它们的下标        pos = (point[0, 0], point[0, 1])        cv2.putText(im,                    str(idx),                    pos,                    fontFace=cv2.FONT_HERSHEY_SCRIPT_SIMPLEX,                    fontScale=0.4,                    color=(0, 0, 255))        cv2.circle(im, pos, 3, color=(0, 255, 255))#绘制特征点    return im

testsize = 48  # 测试图大小
data_transforms = transforms.Compose([    transforms.ToTensor(),    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])net = simpleconv3()net.eval()modelpath = "./models/model.ckpt"  # 模型路径net.load_state_dict(    torch.load(modelpath, map_location=lambda storage, loc: storage))
# 一次测试一个文件img_path = "./Emotion_Recognition_File/find_face_img/"imagepaths = os.listdir(img_path)  # 图像文件夹for imagepath in imagepaths:    im = cv2.imread(os.path.join(img_path, imagepath), 1)    try:        rects = cascade.detectMultiScale(im, 1.3, 5)        x, y, w, h = rects[0]        rect = dlib.rectangle(int(x), int(y), int(x + w), int(y + h))        landmarks = np.matrix([[p.x, p.y]                               for p in predictor(im, rect).parts()])#    #5.使用predictor进行人脸关键点识别    except:#         print("没有检测到人脸")        continue  # 没有检测到人脸
    xmin = 10000    xmax = 0    ymin = 10000    ymax = 0    # 根据最外围的关键点获取包围嘴唇的最小矩形框    # 68 个关键点是从    # 左耳朵0 -下巴-右耳朵16-左眉毛（17-21）-右眉毛（22-26）-左眼睛（36-41）    # 右眼睛（42-47）-鼻子从上到下（27-30）-鼻孔（31-35）    # 嘴巴外轮廓（48-59）嘴巴内轮廓（60-67）    for i in range(48, 67):        x = landmarks[i, 0]        y = landmarks[i, 1]        if x < xmin:            xmin = x        if x > xmax:            xmax = x        if y < ymin:            ymin = y        if y > ymax:            ymax = y
    roiwidth = xmax - xmin    roiheight = ymax - ymin
    roi = im[ymin:ymax, xmin:xmax, 0:3]
    if roiwidth > roiheight:        dstlen = 1.5 * roiwidth    else:        dstlen = 1.5 * roiheight
    diff_xlen = dstlen - roiwidth    diff_ylen = dstlen - roiheight
    newx = xmin    newy = ymin
    imagerows, imagecols, channel = im.shape    if newx >= diff_xlen / 2 and newx + roiwidth + diff_xlen / 2 < imagecols:        newx = newx - diff_xlen / 2    elif newx < diff_xlen / 2:        newx = 0    else:        newx = imagecols - dstlen
    if newy >= diff_ylen / 2 and newy + roiheight + diff_ylen / 2 < imagerows:        newy = newy - diff_ylen / 2    elif newy < diff_ylen / 2:        newy = 0    else:        newy = imagecols - dstlen
    roi = im[int(newy):int(newy + dstlen), int(newx):int(newx + dstlen), 0:3]    roi = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)    roiresized = cv2.resize(roi,                            (testsize, testsize)).astype(np.float32) / 255.0    #unsqueeze_用在pytorch中增加维度#unsqueeze_（0）：在0轴上增加维度#unsqueeze_（1）：在1轴上增加维度     imgblob = data_transforms(roiresized).unsqueeze(0)    imgblob.requires_grad = False    imgblob = Variable(imgblob)#不能进行梯度计算的上下文管理器。当你确定你不调用Tensor.backward()时，不能计算梯度对测试来讲非常有用。对计算它将减少内存消耗，否则requires_grad=True。在这个模式下，每个计算结果都需要使得requires_grad=False，即使当输入为requires_grad=True。当使用enable_grad上下文管理器时这个模式不起作用。这个上下文管理器是线程本地的，对其他线程的计算不起作用。同样函数作为一个装饰器(确保用括号实例化。)。
    torch.no_grad()    predict = F.softmax(net(imgblob))    print(predict)    index = np.argmax(predict.detach().numpy())
    im_show = cv2.imread(os.path.join(img_path, imagepath), 1)    im_h, im_w, im_c = im_show.shape    pos_x = int(newx + dstlen)    pos_y = int(newy + dstlen)    font = cv2.FONT_HERSHEY_SIMPLEX    cv2.rectangle(im_show, (int(newx), int(newy)),                  (int(newx + dstlen), int(newy + dstlen)), (0, 255, 255), 2)    if index == 0:        cv2.putText(im_show, 'none', (pos_x, pos_y), font, 0.6, (0, 0, 255), 2)    if index == 1:        cv2.putText(im_show, 'pout', (pos_x, pos_y), font, 0.6, (0, 0, 255), 2)    if index == 2:        cv2.putText(im_show, 'smile', (pos_x, pos_y), font, 0.6, (0, 0, 255), 2)    if index == 3:        cv2.putText(im_show, 'open', (pos_x, pos_y), font, 0.6, (0, 0, 255), 2)#     cv2.namedWindow('result', 0)#     cv2.imshow('result', im_show)    cv2.imwrite(os.path.join('results', imagepath), im_show)#     print(os.path.join('results', imagepath))    plt.imshow(im_show[:, :, ::-1])  # 这里需要交换通道，因为 matplotlib 保存图片的通道顺序是 RGB，而在 OpenCV 中是 BGR    plt.show()#     cv2.waitKey(0)# cv2.destroyAllWindows()

如果您发现该资源为电子书等存在侵权的资源或对该资源描述不正确等，可点击“私信”按钮向作者进行反馈；如作者无回复可进行平台仲裁，我们会在第一时间进行处理！

评价 0 条

神经网络初探

相关资源

关注我们