首页 > 学术百科

基于AlexNet模型的图像分类

问题描述：基于AlexNet模型的图像分类

AlexNet 框架包含了输⼊层、5个卷积层、3个全连接层以及输出层。其中有3个卷积层进⾏了最⼤池化。

AlexNet框架的特点：1、使⽤了Relu激活函数，相⽐于sigmoid或tanh函数，它是⼀种⾮饱和函数，运⾏速度更快；并且Relu激活函数利⽤分⽚线性结构实现了⾮线性的表达⽅式，更适合于层数较深的⽹络。

2、局部响应归⼀化：为了改善⽹络的性能，在部分卷积层中使⽤了归⼀化处理。

3、重叠池化：传统的池化没有重叠，不同窗⼝的池化过程独⽴计算。AlexNet使⽤了重叠池化，与不重叠窗⼝相⽐，有助于缓解过拟合现象。

4、同时，AlexNet⽹络采⽤丢失输出操作减少过拟合现象。福建江夏学院论坛

具体实现：

inputdata.py：定义了数据集的情况，数据集中⼀共有两类图⽚，分别是sun.jpg，另⼀类是cac.jpg.对这两类图⽚进⾏读取。

inputdata.py

import tensorflow as tf

import os

import numpy as np

import tensorflowpat.v1 as tf

tf.disable_v2_behavior()

def get_files(file_dir):

sun = []

label_sun = []

cac = []

label_cac = []

for file in os.listdir(file_dir):

name = file.split(sep='.')

if 'sun' in name[0]:

sun.append(file_dir + file)

label_sun.append(0)

else:

if 'cac' in name[0]:

cac.append(file_dir + file)

label_cac.append(1)

image_list = np.hstack((sun, cac))

中小企业私募债试点

label_list = np.hstack((label_sun, label_cac))

# 把标签和图⽚都放倒⼀个 temp 中然后打乱顺序，然后取出来

temp = np.array([image_list, label_list])

temp = anspose()

# 打乱顺序

np.random.shuffle(temp)

# 取出第⼀个元素作为 image 第⼆个元素作为 label

image_list = list(temp[:, 0])

label_list = list(temp[:, 1])

label_list = [int(i) for i in label_list]

return image_list, label_list

# image_W ,image_H 指定图⽚⼤⼩，batch_size 每批读取的个数，capacity队列中最多容纳元素的个数

def get_batch(image, label, image_W, image_H, batch_size, capacity):

# 转换数据为 ts 能识别的格式

image = tf.cast(image, tf.string)

label = tf.cast(label, tf.int32)

# 将image 和 label 放倒队列⾥

input_queue = tf.train.slice_input_producer([image, label])

label = input_queue[1]

# 读取图⽚的全部信息

image_contents = tf.read_file(input_queue[0])

# 把图⽚解码，channels ＝3 为彩⾊图⽚, r，g ，b ⿊⽩图⽚为 1 ，也可以理解为图⽚的厚度

image = tf.image.decode_png(image_contents, channels=3)

# 将图⽚以图⽚中⼼进⾏裁剪或者扩充为指定的image_W，image_H

image = size_image_with_crop_or_pad(image, image_W, image_H)

# 对数据进⾏标准化，标准化，就是减去它的均值，除以他的⽅差

image = tf.image.per_image_standardization(image)

# ⽣成批次 num_threads 有多少个线程根据电脑配置设置 capacity 队列中最多容纳图⽚的个数 tf.train.shuffle_batch 打乱顺序， image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=4, capacity=capacity)

# 重新定义下 label_batch 的形状

label_batch = tf.reshape(label_batch, [batch_size])

# 转化图⽚

image_batch = tf.cast(image_batch, tf.float32)

return image_batch, label_batch

model.py：实现AlexNet⽹络框架的定义；

import tensorflow as tf

import tensorflowpat.v1 as tf

tf.disable_v2_behavior()

def inference(images, batch_size, n_classes):

with tf.variable_scope('conv1') as scope:

# 卷积盒的为 3*3 的卷积盒，图⽚厚度是3，输出是16个featuremap

weights = tf.get_variable('weights',

shape=[3, 3, 3, 16],

dtype=tf.float32,

uncated_normal_initializer(stddev=0.1, dtype=tf.float32))

biases = tf.get_variable('biases',

shape=[16],

dtype=tf.float32,

stant_initializer(0.1))

conv = v2d(images, weights, strides=[1, 1, 1, 1], padding='SAME')

pre_activation = tf.nn.bias_add(conv, biases)

conv1 = lu(pre_activation, name=scope.name)

with tf.variable_scope('pooling1_lrn') as scope:

pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pooling1')

norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')

with tf.variable_scope('conv2') as scope:

weights = tf.get_variable('weights',

shape=[3, 3, 16, 16],

dtype=tf.float32,

uncated_normal_initializer(stddev=0.1, dtype=tf.float32))

biases = tf.get_variable('biases',

shape=[16],

dtype=tf.float32,

stant_initializer(0.1))

conv = v2d(norm1, weights, strides=[1, 1, 1, 1], padding='SAME')

pre_activation = tf.nn.bias_add(conv, biases)

conv2 = lu(pre_activation, name='conv2')

# pool2 and norm2

with tf.variable_scope('pooling2_lrn') as scope:

norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')

pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pooling2')

with tf.variable_scope('local3') as scope:

reshape = tf.reshape(pool2, shape=[batch_size, -1])

dim = _shape()[1].value

weights = tf.get_variable('weights',

shape=[dim, 128],

dtype=tf.float32,

uncated_normal_initializer(stddev=0.005, dtype=tf.float32))

biases = tf.get_variable('biases',

shape=[128],

dtype=tf.float32,

stant_initializer(0.1))

local3 = lu(tf.matmul(reshape, weights) + biases, name=scope.name)

海空在召唤# local4

with tf.variable_scope('local4') as scope:

weights = tf.get_variable('weights',

shape=[128, 128],

dtype=tf.float32,

uncated_normal_initializer(stddev=0.005, dtype=tf.float32))

biases = tf.get_variable('biases',

shape=[128],

dtype=tf.float32,

stant_initializer(0.1))

local4 = lu(tf.matmul(local3, weights) + biases, name='local4')

# softmax

with tf.variable_scope('softmax_linear') as scope:

weights = tf.get_variable('softmax_linear',

shape=[128, n_classes],

甘肃省人口与计划生育条例

dtype=tf.float32,

uncated_normal_initializer(stddev=0.005, dtype=tf.float32))

biases = tf.get_variable('biases',

shape=[n_classes],

dtype=tf.float32,

stant_initializer(0.1))

softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')

return softmax_linear

def losses(logits, labels):

with tf.variable_scope('loss') as scope:

cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits \

(logits=logits, labels=labels, name='xentropy_per_example')

loss = tf.reduce_mean(cross_entropy, name='loss')

tf.summary.scalar(scope.name + '/loss', loss)

return loss

def trainning(loss, learning_rate):

with tf.name_scope('optimizer'):

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

global_step = tf.Variable(0, name='global_step', trainable=False)

train_op = optimizer.minimize(loss, global_step=global_step)

return train_op

def evaluation(logits, labels):

with tf.variable_scope('accuracy') as scope:

correct = tf.nn.in_top_k(logits, labels, 1)

correct = tf.cast(correct, tf.float16)

accuracy = tf.reduce_mean(correct)

tf.summary.scalar(scope.name + '/accuracy', accuracy)

return accuracy

train.py：定义了⽹络的训练情况，包括图像的分类数，图像的尺⼨，每次训练时图⽚的数量，学习率及训练次数，还有训练的打印情况

import os

import numpy as np

import tensorflow as tf

import input_data

import model

import tensorflowpat.v1 as tf

tf.disable_v2_behavior()

N_CLASSES = 2 # 2个输出神经元，

IMG_W = 200 # 重新定义图⽚的⼤⼩，图⽚如果过⼤则训练⽐较慢

IMG_H = 200

BATCH_SIZE = 4 # 每批数据的⼤⼩

CAPACITY = 256

MAX_STEP = 1000 # 训练的步数，

learning_rate = 0.0001 # 学习率，建议刚开始的 learning_rate <= 0.0001

def run_training():

# 数据集

train_dir = 'F:/deep learning/net/train/'

# logs_train_dir 存放训练模型的过程的数据，在tensorboard 中查看

logs_train_dir = 'F:/deep learning/net/save/model.ckpt/'

# 获取图⽚和标签集

train, train_label = _files(train_dir)

# ⽣成批次

train_batch, train_label_batch = _batch(train,

train_label,

IMG_W,

IMG_H,

BATCH_SIZE,

CAPACITY)

# 进⼊模型

train_logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES)

# 获取 loss

train_loss = model.losses(train_logits, train_label_batch)

# 训练

train_op = ainning(train_loss, learning_rate)

# 获取准确率

train__acc = model.evaluation(train_logits, train_label_batch)

# 合并 summary

summary_op = _all()

sess = tf.Session()

# 保存summary

train_writer = tf.summary.FileWriter(logs_train_dir, aph)

saver = tf.train.Saver()

sess.run(tf.global_variables_initializer())

coord = tf.train.Coordinator()

threads = tf.train.start_queue_runners(sess=sess, coord=coord)

try:

for step in np.arange(MAX_STEP):

if coord.should_stop():

break

_, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc])

if step % 50 == 0:

print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0)) summary_str = sess.run(summary_op)

train_writer.add_summary(summary_str, step)

if step % 2000 == 0 or (step + 1) == MAX_STEP:

# 每隔2000步保存⼀下模型，模型保存在 checkpoint_path 中

checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')

saver.save(sess, checkpoint_path, global_step=step)宜人贷上市

s.OutOfRangeError:

print('Done training -- epoch limit reached')

finally:

coord.join(threads)

sess.close()

# train

run_training()

test.py：定义了⽹络的测试情况

import tensorflow as tf

from PIL import Image

import matplotlib.pyplot as plt

import input_data

import numpy as np

让子弹飞配乐import model

import os

import cv2

import tensorflowpat.v1 as tf

tf.disable_v2_behavior()

# 从指定⽬录中选取⼀张图⽚

def get_one_image(train):

files = os.listdir(train)

n = len(files)

ind = np.random.randint(0, n)

img_dir = os.path.join(train, files[ind])

image = Image.open(img_dir)

plt.imshow(image)

plt.show()

image = size([200, 200])

image = np.array(image)

#image = cv2.add(img2, image)

# plt.imshow(image)

#plt.show()

return image

本文发布于:2024-09-23 10:24:20，感谢您对本站的认可！

本文链接：https://www.17tex.com/xueshu/453314.html

上一篇：Windows下使用caffe进行VGG人脸识别深度神经网络模型的微调训练

下一篇：系统级功耗模型研究

标签：训练模型数据情况池化输出线性拟合

留言与评论（共有 0 条评论）