读书笔记:《Practical Python and OpenCV》

《Practical Python and OpenCV》读书札记。

1. Load,Display and Save an Image

1
2
3
4
5
6
7
8
9
import cv2
# Load the image and show some basic information on it
image = cv2.imread("Path/to/Image")
# Show the image and wait for a keypress
cv2.imshow("Image", image)
cv2.waitKey(0)
# Save the image -- OpenCV handles converting filetypes
# automatically
cv2.imwrite("newimage.jpg", image)

2. cv2

图像上添加线条及形状

  • cv2.line(image, start_point, stop_point, color, thickness)
  • cv2.rectangle(image,top_left, bottom_right, color, thickness)
    • thickness 为负数,填充形状
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# Initialize our canvas as a 300x300 with 3 channels,
# Red, Green, and Blue, with a black background
canvas = np.zeros((300, 300, 3), dtype = "uint8")
# Draw a green line from the top-left corner of our canvas
# to the bottom-right
green = (0, 255, 0)
cv2.line(canvas, (0, 0), (300, 300), green)
# Now, draw a 3 pixel thick red line from the top-right
# corner to the bottom-left
red = (0, 0, 255)
cv2.line(canvas, (300, 0), (0, 300), red, 3)
# Draw a green 50x50 pixel square, starting at 10x10 and
# ending at 60x60
cv2.rectangle(canvas, (10, 10), (60, 60), green)
# Draw another rectangle, this time we'll make it red and
# 5 pixels thick
cv2.rectangle(canvas, (50, 200), (200, 225), red, 5)
# Let's draw one last rectangle: blue and filled in
blue = (255, 0, 0)
cv2.rectangle(canvas, (200, 50), (225, 125), blue, -1)
cv2.imshow("Canvas", canvas)
cv2.waitKey(0)

Line and Rectangle

添加圆形

  • cv2.circle(image, (centerX, centerY), r, color,thickness)
1
2
3
4
5
6
7
8
9
10
# Reset our canvas and draw a white circle at the center
# of the canvas with increasing radii - from 25 pixels to
# 150 pixels
canvas = np.zeros((300, 300, 3), dtype = "uint8")
(centerX, centerY) = (canvas.shape[1] // 2, canvas.shape[0] // 2)
white = (255, 255, 255)
for r in range(0, 175, 25):
cv2.circle(canvas, (centerX, centerY), r, white)
cv2.imshow("Canvas", canvas)
cv2.waitKey(0)

Circles

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# Let's go crazy and draw 25 random circles
for i in range(0, 25):
# randomly generate a radius size between 5 and 200,
# generate a random color, and then pick a random
# point on our canvas where the circle will be drawn
radius = np.random.randint(5, high = 200)
color = np.random.randint(0, high = 256, size = (3,)).tolist()
pt = np.random.randint(0, high = 300, size = (2,))

# draw our random circle
cv2.circle(canvas, tuple(pt), radius, color, -1)

# Show our masterpiece
cv2.imshow("Canvas", canvas)
cv2.waitKey(0)

RandomCircles

3. Image Processing

  1. 平移

    • cv2.warpAffine()
    • imutil
    1
    2
    3
    4
    5
    6
    7
    def translate(image, x, y):
    # Define the translation matrix and perform the translation
    M = np.float32([[1, 0, x], [0, 1, y]])
    shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))

    # Return the translated image
    return shifted
  2. 旋转

    • cv2.getRotationMatrix2D
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    def rotate(image, angle, center = None, scale = 1.0):
    # Grab the dimensions of the image
    (h, w) = image.shape[:2]

    # If the center is None, initialize it as the center of
    # the image
    if center is None:
    center = (w / 2, h / 2)

    # Perform the rotation
    M = cv2.getRotationMatrix2D(center, angle, scale)
    rotated = cv2.warpAffine(image, M, (w, h))

    # Return the rotated image
    return rotated
  3. 缩放

    • cv2.resize
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    def resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    # initialize the dimensions of the image to be resized and grab the image size
    dim = None
    (h, w) = image.shape[:2]

    # if both the width and height are None, then return the original image
    if width is None and height is None:
    return image

    # check to see if the width is None
    if width is None:
    # calculate the ratio of the height and construct the dimensions
    r = height / float(h)
    dim = (int(w * r), height)

    # otherwise, the height is None
    else:
    # calculate the ratio of the width and construct the dimensions
    r = width / float(w)
    dim = (width, int(h * r))

    # resize the image
    resized = cv2.resize(image, dim, interpolation = inter)

    # return the resized image
    return resized
  4. 反转

    • cv2.flip(image, num)
    • num=1, 水平反转;num=0,垂直翻转;num为负,对角反转
  5. bitwise

    1
    2
    bitwiseAnd = cv2.bitwise_and(rectangle, circle) cv2.imshow("AND", bitwiseAnd) 
    cv2.waitKey(0)
  6. MASKING

    1
    2
    3
    4
    5
    mask = np.zeros(image.shape[:2], dtype = "uint8") 
    cv2.circle(mask, (cX, cY), 100, 255, -1)
    masked = cv2.bitwise_and(image, image, mask = mask) cv2.imshow("Mask", mask)
    cv2.imshow("Mask Applied to Image", masked)
    cv2.waitKey(0)
  7. 色彩空间变换

    1
    2
    3
    4
    5
    6
    7
    8
    9
    # Load the image and show it
    image = cv2.imread(args["image"])
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Convert the image to the HSV (Hue, Saturation, Value)
    # color spaces
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # Convert the image to the L*a*b* color spaces
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
  8. 平滑/模糊(blurring)

    1
    2
    3
    4
    5
    6
    7
    8
    # Averaging Blurring
    cv2.blur(image, (3, 3))
    # Gaussian Blurring
    cv2.GaussianBlur(image, (3, 3), 0)
    # Median Blurring
    cv2.medianBlur(image, 3)
    # Bilateral Blurring
    cv2.bilateralFilter(image, 5, 21, 21)

Traditionally, the median blur method has been most effective when removing salt-and-pepper noise.

  1. Threshold

    1
    2
    3
    4
    (T, thresh) = cv2.threshold(blurred, 155, 255, cv2.THRESH_BINARY) 
    cv2.imshow("Threshold Binary", thresh)
    (T, threshInv) = cv2.threshold(blurred, 155, 255, cv2. THRESH_BINARY_INV)
    cv2.bitwise_and(image, image, mask = threshInv)
  2. 边缘检测

    1
    2
    3
    4
    image = cv2.imread(img)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.GaussianBlur(image, (5,5), 0)
    canny = cv2.Canny(image, 30 ,150)

canny

  1. 轮廓检测

    1
    2
    3
    4
    5
    6
    7
    8
    image = cv2.imread(img)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.GaussianBlur(image, (11,11), 0)
    canny = cv2.Canny(image, 30 ,150)
    (_, cnts, _) = cv2.findContours(canny.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    print("I count {} coins in this image".format(len(cnts)))
    coins = image.copy()
    cv2.drawContours(coins, cnts, -1, (0, 255, 255), 2)

    countours

    • 从图像中将对象扣取
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    # Now, let's loop over each contour
    for (i, c) in enumerate(cnts):
    # We can compute the 'bounding box' for each contour, which is the rectangle that encloses the contour
    (x, y, w, h) = cv2.boundingRect(c)

    # Now that we have the contour, let's extract it using array slices
    print("Coin #{}".format(i + 1))
    coin = image[y:y + h, x:x + w]
    cv2.imshow("Coin", coin)

    # Just for fun, let's construct a mask for the coin by finding The minumum enclosing circle of the contour
    mask = np.zeros(image.shape[:2], dtype = "uint8")
    ((centerX, centerY), radius) = cv2.minEnclosingCircle(c)
    cv2.circle(mask, (int(centerX), int(centerY)), int(radius), 255, -1)
    mask = mask[y:y + h, x:x + w]
    cv2.imshow("Masked Coin", cv2.bitwise_and(coin, coin, mask = mask))
    cv2.waitKey(0)

Tips

  1. 为解决python 2.7和python 3中print函数不兼容的问题,可通过导入如下命令解决在python 2.7环境运行python 3中print函数不兼容的问题:

    1
    from __future__ import print_function
  2. OpenCV存储RGB信息采用的逆向存储方式,即为BGR; matplotlib.plot中为RGB

  3. np.random

Method Desription
rand(d0, d1, …, dn) Random values in a given shape.
randn(d0, d1, …, dn) Return a sample (or samples) from the “standard normal” distribution.
randint(low[, high, size, dtype]) Return random integers from low (inclusive) to high (exclusive).
random_integers(low[, high, size]) Random integers of type np.int between low and high, inclusive.
random_sample([size]) Return random floats in the half-open interval [0.0, 1.0).
random([size]) Return random floats in the half-open interval [0.0, 1.0).产生随机矩阵,如random.random([2,3])产生一个2x3维的随机数
ranf([size]) Return random floats in the half-open interval [0.0, 1.0).
sample([size]) Return random floats in the half-open interval [0.0, 1.0).
choice(a[, size, replace, p]) Generates a random sample from a given 1-D array
bytes(length) Return random bytes.

Deep Learning for Computer Vision

本文记录深度学习书籍《Deep Learning for Computer Vision with Python》的读书笔记。

@toc

背景

  • 深度学习拥有60多年历史,虽然曾经采用过不同的名称和不同的主导技术:“deep learning” has existed since the 1940s undergoing various name changes, including cybernetics, connectionism, and the most familiar, Artificial Neural Networks (ANNs).
  • 神经网络的普适定律:Further research demonstrated that neural networks are universal approximators , capable of approximating any continuous function (but placing no guarantee on whether or not the network can actually learn the parameters required to represent a function).
  • Classic machine learning algorithms for unsupervised learning include Principle Component Analysis (PCA) and k-means clustering. Specific to neural networks, we see Autoencoders, Self-Organizing Maps (SOMs), and Adaptive Resonance Theory applied to unsupervised learning.
  • Popular choices for semisupervised learning include label spreading, label propagation, ladder networks, and co-learning/co-training.

Image and Pixels

  • Pixels are represented in two ways:
    • Grayscale: Each pixel is a scalar value between 0 and 255.(0 for “Black” and 255 for “White”),0—>255 dark —> light
    • Color: RGB color space, (R,G,B), Each Red, Green, and Blue channel can have values defined in the range [0,255] for a total of 256 “shades”, where 0 indicates no representation and 255 demonstrates full representation.
  • Given that the pixel value only needs to be in the range [0,255], we normally use 8-bit unsigned integers to represent the intensity.

Images as Numpy Arrays

  • (height, width, depth) 表示

height 排第一的主要原因是由于矩阵表示形式中,一般把行放在前面,而图像中height大小表征了行的数目。

1
2
3
4
5
6
import cv2
image = cv2.imread("example.png") print(image.shape)
cv2.imshow("Image", image)
cv2.waitKey(0)
## Access an individual pixel value
(b, g, r) = image[20, 100] # accesses pixel at x=100, y=20
  1. 取像素y在x前面,还是由于矩阵的表示形式;
  2. RGB顺序反的,这是由于OpenCV历史原因导致的表示形式差异: Because the BGR ordering was popular among camera manufacturers and other software developers at the time.

Others

aspect ratio: the ratio of the width to the height of the image.

神经网络模型一般都是固定输入,比如32×32, 64×64, 224×224, 227×227, 256×256, and 299×299. 需要对不同大小的图像进行reshape操作,For some datasets you can simply ignore the aspect ratio and squish, distort, and compress your images prior to feeding them through your network. On other datasets, it’s advantageous to preprocess them further by resizing along the shortest dimension and then cropping the center.

Image Classification

图像分类和图像理解是当今技术视觉领域最火的课题。

定义

图像分类: the task of assigning a label to an image from a predefined set ofcategories.

图像分类的过程是学习图片中的“underlying patterns”

Semantic Gap: the difference between how a human perceives the contents of an image versus how an image can be represented in a way a computer can understand the process.

挑战

Challenge for Image Classification

数据集(TODO)

  1. MNIST

    • 目标: 完成0-9手写字符的识别

    • 说明:

      • NIST代表National Institute ofStandards and TechnologyM代表Modified
      • 深度学习的Hello World
      • 包含60,000训练样本,10,000测试样本,每个样本为28x28的灰度图像
    • 目前准确度: >99%

    • 获取地址: http://yann.lecun.com/exdb/mnist/

      MNIST

  2. Fashion-MNIST

    • 目标: 完成10种不同衣服的识别

    • 说明:

      • 根据MNIST设计的新的数据集,难度比MNIST略高
      • 包含60,000训练样本,10,000测试样本,每个样本为28x28的灰度图像
    • 目前准确度: >95%

    • 获取地址: https://github.com/zalandoresearch/fashion-mnist

      Fashion-MNIST

  3. CIFAR-10

  4. Animals: Dogs,Cat, Pandas

  5. Flowers-17

  6. CALtECH-101

  7. Tiny ImageNet 200

  8. Adience

  9. ImageNet

  10. 表情识别(是否笑脸)

    • 说明:
    • 共计13165张灰度图片,每张图片大小为64x64
    • 分为笑脸和非笑脸两类,其中笑脸3690张,非笑脸9475张(数据不平衡)
    • 获取地址:https://github.com/hromi/SMILEsmileD
    • 另外fer2013提供了更多表情的训练用数据集

Smile Datasets

  1. 性别和年龄数据集
  1. Indoor CVPR

  2. Stanford Cars

神经网络基础

优化算法(TODO,整合到单独Note)

  • Chapter 8

Regularization (TODO, 整合到单独Note)

  • Chapter 9
  • chapter 10,激活函数,perception

为什么验证损失函数值有时候小于训练损失函数

这可能是有几方面原因导致的,或多方面原因综合作用的结果,主要的原因包括:

  1. 训练集和验证集分布不均,导致训练集数据难度大,验证集简单数据分布比例大;
  2. 数据放大本身形成了一种规则化,降低了训练集的训练结果;(这本身是规则化的目标,降低在训练集的表现,提升泛化性能)
  3. 训练时间或轮数不够;

关于学习率

  • keras中提供了decay参数来调节学习率的变化情况:

    1
    opt = SGD(lr=0.01, decay=0.01 / 40, momentum=0.9, nesterov=True)

    使用公式:

  • 另一种学习率为阶梯学习率:ctrl + c

    Keras提供一个类:LearningrateScheduler来配置自定义的学习率函数

    比如:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    def step_decay(epoch):
    # initialize the base initial learning rate, drop factor, and epochs to drop every
    initAlpha = 0.01
    factor = 0.25
    dropEvery = 5

    # compute learning rate for the current epoch
    alpha = initAlpha * (factor ** np.floor((1 + epoch) / dropEvery))

    # return the learning rate
    return float(alpha)

    ##定义callback
    callbacks = [LearningRateScheduler(step_decay)]

当定义了学习率之后,SGD中声明的配置信息将被忽略

网络模型

VGG

  • 所有的卷积层使用同一种卷积核:3X3
  • 堆积多个CONV=>RELU层再进行一次POOL操作

MNIST

Researchers tend to use the MNIST dataset as a benchmark to evaluate new classification algorithms. If their methods cannot obtain > 95% classification accuracy, then there is either a flaw in (1) the logic of the algorithm or (2) the implementation itself.

Case Study

使用OpenCV的Haar cascade 算法进行人脸检测,提取人脸的ROI(Region of intrest), 通过一个卷积神经网络进行表情识别;

可以结合Github开源的表情识别代码一起研究

  • 路径处理 os.path.sep: 提取路径分隔符
  • 数据不平衡的处理,可以考虑不同分类的权重,在训练时通过赋权调整平衡性,代码如下:
1
2
3
4
5
6
7
8
# Handle data imbalance
# account for skew in the labeled data
classTotals = labels.sum(axis=0)
classWeight = classTotals.max() / classTotals

## When training
H = model.fit(trainX, trainY, validation_data=(testX, testY),
class_weight=classWeight, batch_size=64, epochs=15, verbose=1)

手写字的预处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def preprocess(image, width, height):
#grap the dimensions of the image, then initialize the padding values
(h, w) = image.shape[:2]
#if width greater than height, resize along the width
if w > h:
image = imutils.resize(iamge, width=width)
else:
image = imutils.resize(image, height=height)
#padding values for w and h to obtain the target dimensions
padW = int((width - image.shape[1])/2.0)
padH = int((height - image.shape[0])/2.0)

#pad the image then apply one more resizing to handle any rounding issues
image = cv2.copyMakeBorder(image, padH, padH, padW, padW, cv2.BORDER_REPLICATE)
iamge = cv2.resize(image, (width, height))

return image

image = cv2.imread(img)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.copyMakeBorder(gray, 20,20,20,20, cv2.BORDER_REPLICATE)
# threshold the image to reveal the digits
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
#find contours in the image, keeping only the four largest ones
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if imutils.is_cv2() else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:4]
cnts = contours.sort_contours(cnts)[0]

# initialize the output image as a "grayscale" image with 3
# channels along with the output predictions
output = cv2.merge([gray] * 3)

for c in cnts:
# compute the bounding box for the contour then extract the
# digit
(x, y, w, h) = cv2.boundingRect(c)
roi = gray[y - 5:y + h + 5, x - 5:x + w + 5]
roi = preprocess(roi, 28, 28)
roi = np.expand_dims(img_to_array(roi), axis=0) / 255.0
#pred = model.predict(roi).argmax(axis=1)[0] + 1
#predictions.append(str(pred))
# draw the prediction on the output image
cv2.rectangle(output, (x - 2, y - 2),(x + w + 4, y + h + 4), (0, 255, 0), 1)
#cv2.putText(output, str(pred), (x - 5, y - 5),cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2)
# show the output image
#print("[INFO] captcha: {}".format("".join(predictions)))

plt.imshow(output)
#cv2.waitKey()

Useful Functions

图像预处理及加载模板

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
'''
File1: Preprocessor
'''
import cv2

class SimplePreporcessor:
def __init__(self, width, height, inter=cv2.INTER_AREA)
# store the target image width, height, and interpolation method used when resizing
self.width = width
self.height = height
self.inter = inter

def preprocess(self, image):
# resize the image to a fixed size, ignoring the aspect ratio
return cv2.resize(image, (self.width, self.height), interpolation = self.inter)

'''
Data Loader
'''
# import the necessary packages
import numpy as np
import cv2
import os

class SimpleDatasetLoader:
def __init__(self, preprocessors=None):
self.preprocessors = preprocessors
# if the preprocessors are None, initialize them as an empty list
if self.preprocessors is None:
self.preprocessors = []
def load(self, imagePaths, verbose =-1):
data = []
labels = []

for (i, imagePath) in enumerate(imagePaths):
# load the image and extract the class label assuming
# # that our path has the following format:
# # /path/to/dataset/{class}/{image}.jpg
image = cv2.imread(imagePath)
label = imagePath.split(os.path.sep)[-2]

# check to see if our preprocessors are not None
if self.preprocessors is not None:
for p in self.preprocessors:
image = p.preprocess(image)

data.append(image)
labels.append(label)

# show an update every ‘verbose‘ images
if verbose >0 and i >0 and (i+1)%verbose == 0:
print("[INFON] process {}/{}").format(i+1,len(imagePaths))
# return a tuple of the data and labels
return (np.array(data), np.array(labels))
'''
Main
'''
# import the necessary packages
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import paths
import argparse

# construct the argument parse and parse the arguments ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True, help="path to input dataset")
ap.add_argument("-k", "--neighbors", type=int, default=1, help="# of nearest neighbors for classification")
ap.add_argument("-j", "--jobs", type=int, default=-1, help="# of jobs for k-NN distance (-1 uses all available cores)")
args = vars(ap.parse_args())

print("[INFO] loading images ...")
imagePaths = list(paths.list_images(args["dataset"]))

# initialize the image preprocessor, load the dataset from disk,
# and reshape the data matrix
sp = SimplePreporcessor(32, 32)
sdl = SimpleDatasetLoader(preprocessors=[sp])

(data, labels) = sdl.load(imagePaths, verbose=500)
#flatten for use in KNN
data = data.reshape((data.shape[0],32*32*3))

print("[INFO] feature matrix: {:.1f}MB").format(data.nbytes/(1024*1000.0))

# encode the labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42)

# train and evaluate a kNN classifier on raw pixel intensities
print("[INFO] evaluate kNN classifier ...")
model = KNeighborsClassifier(n_neighbors=args["neighbors"]), n_jobs=args["jobs"])
model.fit(trainX, trainY)
print(classification_report(testY, model.predict(testX),target_names==le.classes_))

sklearn.metrics.classification_report

sklearn中的classification_report函数用于显示主要分类指标的文本报告.在报告中显示每个类的精确度,召回率,F1值等信息。
主要参数:

  • y_true:1维数组,或标签指示器数组/稀疏矩阵,目标值。
  • y_pred:1维数组,或标签指示器数组/稀疏矩阵,分类器返回的估计值。
  • labels:array,shape = [n_labels],报表中包含的标签索引的可选列表。
  • target_names:字符串列表,与标签匹配的可选显示名称(相同顺序)。
  • sample_weight:类似于shape = [n_samples]的数组,可选项,样本权重。
  • digits:int,输出浮点值的位数.
1
2
3
4
5
from sklearn.metrics import classification_report
y_true = [0, 1, 2, 2, 2]
y_pred = [0, 0, 2, 2, 1]
target_names = ['class 0', 'class 1', 'class 2']
print(classification_report(y_true, y_pred, target_names=target_names))

opencv 给图像添加描述

1
2
3
import cv2
# draw the label with the highest score on the image as our # prediction
cv2.putText(orig, "Label: {}".format(labels[np.argmax(scores)]), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

Keras中的Checkpoint机制

1
2
3
4
5
6
7
8
9
10
11
12
13
from keras.callbacks import ModelCheckpoint

# construct the callback to save only the *best* model to disk
# based on the validation loss
fname = os.path.sep.join([args["weights"],
"weights-{epoch:03d}-{val_loss:.4f}.hdf5"])
checkpoint = ModelCheckpoint(fname, monitor="val_loss", mode="min",
save_best_only=True, verbose=1)
callbacks = [checkpoint]

print("[INFO] training network...")
H = model.fit(trainX, trainY, validation_data=(testX, testY),
batch_size=64, epochs=40, callbacks=callbacks, verbose=2)

参数:

  • filename:字符串,保存模型的路径
  • monitor:需要监视的值
  • verbose:信息展示模式,0或1
  • save_best_only:当设置为True时,将只保存在验证集上性能最好的模型
  • mode:‘auto’,‘min’,‘max’之一,在save_best_only=True时决定性能最佳模型的评判准则,例如,当监测值为val_acc时,模式应为max,当检测值为val_loss时,模式应为min。在auto模式下,评价准则由被监测值的名字自动推断。
  • save_weights_only:若设置为True,则只保存模型权重,否则将保存整个模型(包括模型结构,配置信息等)
  • period:CheckPoint之间的间隔的epoch数
  1. 可以monitor loss值也可以是val_acc,train_loss, train_acc;
  2. 更多内容参见:http://keras-cn.readthedocs.io/en/latest/other/callbacks/

EarlyStopping

1
keras.callbacks.EarlyStopping(monitor='val_loss', patience=0, verbose=0, mode='auto')

当监测值不再改善时,该回调函数将中止训练

参数

  • monitor:需要监视的量
  • patience:当early stop被激活(如发现loss相比上一个epoch训练没有下降),则经过patience个epoch后停止训练。
  • verbose:信息展示模式
  • mode:‘auto’,‘min’,‘max’之一,在min模式下,如果检测值停止下降则中止训练。在max模式下,当检测值不再上升则停止训练。

基于keras callback实现训练过程监控

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# import the necessary packages
from keras.callbacks import BaseLogger
import matplotlib.pyplot as plt
import numpy as np
import json
import os

class TrainingMonitor(BaseLogger):
def __init__(self, figPath, jsonPath=None, startAt=0):
# store the output path for the figure, the path to the JSON serialized file, and the starting epoch
super(TrainingMonitor, self).__init__()
self.figPath = figPath
self.jsonPath = jsonPath
self.startAt = startAt


def on_train_begin(self, logs={}):
# initialize the history dictionary
self.H = {}

# if the JSON history path exists, load the training history
if self.jsonPath is not None:
if os.path.exists(self.jsonPath):
self.H = json.loads(open(self.jsonPath).read())

# check to see if a starting epoch was supplied
if self.startAt > 0:
# loop over the entries in the history log and
# trim any entries that are past the starting
# epoch
for k in self.H.keys():
self.H[k] = self.H[k][:self.startAt]

def on_epoch_end(self, epoch, logs={}):
# loop over the logs and update the loss, accuracy, etc.
# for the entire training process
for (k, v) in logs.items():
l = self.H.get(k, [])
l.append(v)
self.H[k] = l
# check to see if the training history should be serialized
# to file
if self.jsonPath is not None:
f = open(self.jsonPath, "w")
f.write(json.dumps(self.H))
f.close()
# ensure at least two epochs have passed before plotting
# (epoch starts at zero)
if len(self.H["loss"]) > 1:
#plot the training loss and accuracy
N = np.arange(0, len(self.H["loss"]))
plt.style.use("ggplot")
plt.figure()
plt.plot(N, self.H["loss"], label="train_loss")
plt.plot(N, self.H["val_loss"], label="val_loss")
plt.plot(N, self.H["acc"], label="train_acc")
plt.plot(N, self.H["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy [Epoch {}]".format(len(self.H["loss"])))
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()

#save the figure
plt.savefig(self.figPath)
plt.close()
  • figPath: The path to the output plot that we can use to visualize loss and accuracy over time.
  • jsonPath: An optional path used to serialize the loss and accuracy values as a JSON file. This path is useful if you want to use the training history to create custom plots of your own.
  • startAt: This is the starting epoch that training is resumed at when using ctrl + c training.

参考

读书笔记-<腾讯传:中国互联网公司进化论>

2017年1月份,整理了一堆计划在2017年阅读的书籍清单,其中包括一些读过多遍的老书,也有一些刚刚问世的新书。希望通过一轮全新的阅读能能够获得更多的感悟。其中《腾讯传:中国互联网公司进化论》在2016年12月刚刚出版,又是关于BAT巨头的全新著作,不免先睹为快。


Git使用总结

一直在用Git作为代码版本控制工具,一般都是现查现用,难得把一本专门介绍git的书从头读到尾。做个笔记,记录以下新Get到的一些技能,以备以后快速查看。


Your browser is out-of-date!

Update your browser to view this website correctly. Update my browser now

×