This post is divided into 2 sections: Summary and Implementation.
We are going to have an in-depth review of ImageNet Classification with Deep ConvolutionalNeural Networks paper which introduces the AlexNet architecture.
The implementation uses Keras as framework. To see full implementation,
please refer to this repository.Also, if you want to read other "Summary and Implementation", feel free to
check them at my blog.
DISCLAIMER:
AlexNet architecture:
AlexNet details:
AlexNet inputs:
RGB image of size 256 x 256. If not, training/test set images need to be resized.
the RGB image of size 256 x 256 will then be cropped into 227 x 227 (cf Data Augmentation part). The paper mistakenly says 224 x 224.
AlexNet is proned to overfit, thus to prevent that:
Remark:
def grouped_conv(input_val, name, half, filters, kernel_size, strides=1, padding='valid'):
"""
Performs a grouped convolution.
Parameters:
-input_val: previous layer.
-name: name of the convolution.
-half: Number of channels for each convolution.
-filters: Number of filters for each convolution.
-kernel_size: Kernel size used for each convolution.
-strides: stride. Default value is 1.
-padding: 'valid'(default) or 'same'.
Returns:
-conv: concatenation of the 2 previous convolution layer.
"""
input_val_1 = Lambda(lambda x: x[:, :, :, :half])(input_val)
input_val_2 = Lambda(lambda x: x[:, :, :, half:])(input_val)
conv_1 = Conv2D(filters=filters,
kernel_size=kernel_size,
padding=padding,
activation='relu',
name=name + '_1')(input_val_1)
conv_2 = Conv2D(filters=filters,
kernel_size=kernel_size,
padding=padding,
activation='relu',
name=name + '_2')(input_val_2)
conv = Concatenate(name=name)([conv_1, conv_2])
return conv
def AlexNet():
x = Input((227, 227, 3))
conv1 = Conv2D(filters=96,
kernel_size=(11, 11),
strides=4,
activation='relu',
name='conv1')(x)
pool1 = MaxPooling2D(pool_size=3,
strides=2)(conv1)
conv2 = grouped_conv(input_val=pool1,
name='conv2',
half=48,
filters=128,
kernel_size=5,
padding='same')
pool2 = MaxPooling2D(pool_size=3,
strides=2)(conv2)
conv3 = Conv2D(filters=384,
kernel_size=(3, 3),
padding='same',
activation='relu',
name='conv3')(pool2)
conv4 = grouped_conv(input_val=conv3,
name='conv4',
half=192,
filters=192,
kernel_size=3,
padding='same')
conv5 = grouped_conv(input_val=conv4,
name='conv5',
half=192,
filters=128,
kernel_size=3,
padding='same')
pool5 = MaxPooling2D(pool_size=3,
strides=2)(conv5)
flatten = Flatten()(pool5)
fc6 = Dense(4096, activation='relu', name='fc6')(flatten)
fc7 = Dense(4096, activation='relu', name='fc7')(fc6)
fc8 = Dense(1000, activation='softmax', name='fc8')(fc7)
model = Model(inputs=x, outputs=fc8)
return model
imagenet_mean = np.array([104., 117., 124.], dtype=np.float32)
fig2 = plt.figure(figsize=(30,10))
for i, image in enumerate(imgs):
img = cv2.resize(image.astype(np.float32), (227,227))
img -= imagenet_mean
img = img.reshape((1,227,227,3))
probs = model.predict(img)
class_name = class_names[np.argmax(probs)]
fig2.add_subplot(1,4,i+1)
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.title("Class: " + class_name + ", probability: %.4f" %probs[0,np.argmax(probs)], fontsize=13)
plt.axis('off')
plt.text(0, 240, 'Top-5 Accuracy:')
x, y = 10, 260
for idx in np.argsort(probs)[0][-5::][::-1]:
plt.text(x, y, s ='- {}, probability: {:.4f}'.format(class_names[idx], probs[0, idx]), fontsize=12)
y += 20
print()