reference: https://kezunlin.me/post/62811272/
Guide
Matplot (skimage/ PIL Image)
import matplotlib.pyplot as plt
import matplotlib.image as img
image = img.imread("images/cat.jpg")
print image.shape
print image[:5,:5,0]
plt.imshow(image)
plt.show()
(360, 480, 3)
[[26 27 25 28 30]
[26 27 25 26 28]
[26 26 26 26 27]
[27 26 27 28 29]
[29 27 26 26 29]]
png
PIL.Image
import matplotlib.pyplot as plt
from PIL import Image
image = Image.open("images/cat.jpg")
print(image)
plt.imshow(image)
plt.show()
png
skimage
import skimage
image = skimage.io.imread(image_filepath)
OpenCV
import cv2
image = cv2.imread("images/cat.jpg")
print image.shape
print image[:5,:5,0]
plt.imshow(image)
plt.show()
(360, 480, 3)
[[49 50 47 48 50]
[51 52 48 48 50]
[51 51 49 48 49]
[50 49 49 48 49]
[52 50 49 48 49]]
png
The colors of our image are clearly wrong! Why is this?
The
answer lies as a caveat with OpenCV.OpenCV represents RGB images as
multi-dimensional NumPy arrays…but in reverse order! This means that OpenCV images are actually represented in BGR order rather than RGB!
import cv2
image = cv2.imread("images/cat.jpg")
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.axis("off")
plt.imshow(rgb_image)
plt.show()
png
Matplot VS. OpenCV
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as img
image1 = img.imread("images/cat.jpg")
import cv2
image = cv2.imread("images/cat.jpg")
image2 = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
print image1.dtype
print image1[:5,:5,0]
print
print image2.dtype
print image2[:5,:5,0]
equal_count = np.sum( np.equal(image1[:,:,:],image2[:,:,:]) )
print equal_count
print equal_count == 360*480*3
uint8
[[26 27 25 28 30]
[26 27 25 26 28]
[26 26 26 26 27]
[27 26 27 28 29]
[29 27 26 26 29]]
uint8
[[26 27 25 28 30]
[26 27 25 26 28]
[26 26 26 26 27]
[27 26 27 28 29]
[29 27 26 26 29]]
518400
True
caffe.io.load_image
caffe.io.load_image
loads data in a normalized form (0-1)
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['image.interpolation'] = 'nearest'
import sys
caffe_root = '../'
sys.path.insert(0, caffe_root + 'python')
import caffe
image = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')
print image.shape,image.dtype
print image[:5,:5,0]
plt.figure()
plt.imshow(image)
image2 = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg',color=False)
print image2.shape
gray_image2 = image2.squeeze()
print gray_image2.shape,gray_image2.dtype
print gray_image2[:5,:5]
plt.figure()
plt.imshow(gray_image2)
image3 = caffe.io.load_image(caffe_root + 'examples/images/cat_gray.jpg',color=False)
print image3.shape
gray_image3 = image3.squeeze()
print gray_image3.shape,gray_image3.dtype
print gray_image3[:5,:5]
plt.figure()
plt.imshow(gray_image3)
plt.show()
(360, 480, 3) float32
[[ 0.10196079 0.10588235 0.09803922 0.10980392 0.11764706]
[ 0.10196079 0.10588235 0.09803922 0.10196079 0.10980392]
[ 0.10196079 0.10196079 0.10196079 0.10196079 0.10588235]
[ 0.10588235 0.10196079 0.10588235 0.10980392 0.11372549]
[ 0.11372549 0.10588235 0.10196079 0.10196079 0.11372549]]
(360, 480, 1)
(360, 480) float32
[[ 0.19543412 0.19935569 0.18842432 0.19120707 0.1990502 ]
[ 0.19599961 0.19992118 0.19151255 0.19234589 0.20018902]
[ 0.19599961 0.19599961 0.19543412 0.19234589 0.19626746]
[ 0.19935569 0.19543412 0.19626746 0.19120707 0.19512863]
[ 0.20719883 0.19935569 0.19543412 0.19234589 0.19512863]]
(360, 480, 1)
(360, 480) float32
[[ 0.10196079 0.10588235 0.09803922 0.10980392 0.11372549]
[ 0.10196079 0.10588235 0.09803922 0.10196079 0.10980392]
[ 0.10196079 0.10588235 0.10196079 0.10196079 0.10588235]
[ 0.10588235 0.10196079 0.10588235 0.10980392 0.11372549]
[ 0.11764706 0.10196079 0.10196079 0.10588235 0.10980392]]
png
png
png
caffe.io.Transformer
caffe.io.Transformer
for Network input blob(m,c,h,w):
- caffe Network default use BGR image format just as OpenCV format.
- caffe mean files use BGR ordering, which is calculated from trainning images instead of test images.
mu = np.array([104, 117, 123] # BGR
- pixel range in [0,255] with dtype float32.
- (m,c,h,w), BGR order,[0,255] range,float32
caffe.io.load_image
caffe.io.Transformer
:
- input image: caffe.io.load_image: (h,w,c),RGB,[0,1],float32
- transformed image: (c,h,w), BGR,[0,255] float32
caffe.io.Transformer
steps:
Note that the mean subtraction is always carried out before scaling.
- transformer.set_transpose(‘data’, (2,0,1)) #(h,w,c)->(c,h,w)
- transformer.set_channel_swap(‘data’, (2,1,0)) # RGB->BGR
- transformer.set_raw_scale(‘data’, 255) # [0,1]->[0,255] float32
- transformer.set_mean(‘data’, mu) # subtract BGR
keep in mind that the Transformer is only required when using a deploy.prototxt-like network definition, so without the Data Layer. When using a Data Layer, things get easier to understand.
import numpy as np
import matplotlib.pyplot as plt
import sys
caffe_root = '../'
sys.path.insert(0, caffe_root + 'python')
import caffe
image = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')
print image.shape,image.dtype
print image[:5,:5,0]
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)
data_shape = (10, 3, 227, 227)
transformer = caffe.io.Transformer({'data': data_shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_channel_swap('data', (2,1,0))
transformer.set_raw_scale('data', 255)
transformer.set_mean('data', mu)
transformed_image = transformer.preprocess('data', image)
print
print 'original image: ',image.shape,image.dtype
print 'transform image: ',transformed_image.shape,transformed_image.dtype
print transformed_image[0,:5,:5]
(360, 480, 3) float32
[[ 0.10196079 0.10588235 0.09803922 0.10980392 0.11764706]
[ 0.10196079 0.10588235 0.09803922 0.10196079 0.10980392]
[ 0.10196079 0.10196079 0.10196079 0.10196079 0.10588235]
[ 0.10588235 0.10196079 0.10588235 0.10980392 0.11372549]
[ 0.11372549 0.10588235 0.10196079 0.10196079 0.11372549]]
original image: (360, 480, 3) float32
transform image: (3, 227, 227) float32
[[-53.86381531 -56.23903656 -53.54626465 -53.14715195 -51.32625961]
[-52.93947601 -55.71855164 -54.00423813 -54.76469803 -52.88771057]
[-53.89373398 -55.67879486 -55.4278717 -55.22265625 -53.47174454]
[-50.98455811 -51.3506012 -54.06866074 -52.09104156 -52.94168854]
[-49.92769241 -49.85874176 -52.08575439 -52.50840759 -51.3900528 ]]
cv2.imread
caffe.io.Transformer
:
- input image: cv2.imread: (h,w,c),BGR,[0,255],float32
- transformed image: (c,h,w), BGR order,[0,255] float32
caffe.io.Transformer
steps:
Note that the mean subtraction is always carried out before scaling.
- transformer.set_transpose(‘data’, (2,0,1)) #(h,w,c)->(c,h,w)
- transformer.set_mean(‘data’, mu) # subtract BGR
import cv2
image = cv2.imread("test/cat.jpg")
data_shape = (10, 3, 227, 227)
transformer = caffe.io.Transformer({'data': data_shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', mu)
transformed_image = transformer.preprocess('data', image)
print
print 'original image: ',image.shape,image.dtype
print 'transform image: ',transformed_image.shape,transformed_image.dtype
print transformed_image[0,:5,:5]
deprocess transformed_image
def deprocess_net_image(image):
image = image.copy()
image = image[::-1]
image = image.transpose(1, 2, 0)
image += [123, 117, 104]
image[image < 0], image[image > 255] = 0, 255
image = np.round(image)
image = np.require(image, dtype=np.uint8)
return image
image = deprocess_net_image(transformed_image)
print image.shape,image.dtype
print image[:5,:5,0]
plt.imshow(image)
plt.show()
(227, 227, 3) uint8
[[27 27 29 29 30]
[26 26 28 27 28]
[27 27 27 26 28]
[27 28 25 28 27]
[26 29 28 28 28]]
png
set 3-dim image to 4-dim input blob data
import numpy as np
data = np.zeros((2,3,4,4))
print data
image = np.arange(48).reshape(3,4,4)
print
print image
print 'set image to data'
data[...] = image
print data
[[[[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]
[[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]
[[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]]
[[[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]
[[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]
[[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]]]
[[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]
[12 13 14 15]]
[[16 17 18 19]
[20 21 22 23]
[24 25 26 27]
[28 29 30 31]]
[[32 33 34 35]
[36 37 38 39]
[40 41 42 43]
[44 45 46 47]]]
set image to data
[[[[ 0. 1. 2. 3.]
[ 4. 5. 6. 7.]
[ 8. 9. 10. 11.]
[ 12. 13. 14. 15.]]
[[ 16. 17. 18. 19.]
[ 20. 21. 22. 23.]
[ 24. 25. 26. 27.]
[ 28. 29. 30. 31.]]
[[ 32. 33. 34. 35.]
[ 36. 37. 38. 39.]
[ 40. 41. 42. 43.]
[ 44. 45. 46. 47.]]]
[[[ 0. 1. 2. 3.]
[ 4. 5. 6. 7.]
[ 8. 9. 10. 11.]
[ 12. 13. 14. 15.]]
[[ 16. 17. 18. 19.]
[ 20. 21. 22. 23.]
[ 24. 25. 26. 27.]
[ 28. 29. 30. 31.]]
[[ 32. 33. 34. 35.]
[ 36. 37. 38. 39.]
[ 40. 41. 42. 43.]
[ 44. 45. 46. 47.]]]]
transformer vs. python code
caffe.io.load_image
import os
import sys
import cv2
import numpy as np
# Make sure that caffe is on the python path:
caffe_root = './'
os.chdir(caffe_root)
sys.path.insert(0, os.path.join(caffe_root, 'python'))
import caffe
# caffe.io.load_image: transformer + python code
data_shape = [1,3,512,512]
transformer = caffe.io.Transformer({'data':data_shape}) # resize
transformer.set_transpose('data', (2, 0, 1)) # hwc ===> chw
transformer.set_channel_swap('data', (2, 1, 0)) # rgb===>bgr
transformer.set_raw_scale('data', 255) # [0-1]===> [0,255]
transformer.set_mean('data', np.array([104, 117, 123])) # bgr mean pixel
image_file = "./images/1.png"
print("image_file=", image_file)
image = caffe.io.load_image(image_file) # hwc, rgb, 0-1
print("image.shape=", image.shape)
transformed_image = transformer.preprocess('data', image) #
print("transformed_image.shape=", transformed_image.shape) # 3,512,512
b,g,r = transformed_image
print(b.shape) # 512,512
print(g.shape)
print(r.shape)
print("")
print(transformed_image[:,:5,:5])
output
('image_file=', './images/1.png')
('image.shape=', (1080, 1920, 3))
('transformed_image.shape=', (3, 512, 512))
(512, 512)
(512, 512)
(512, 512)
[[[ -98. -98. -98. -98. -98. ]
[ -98. -98. -98. -98. -98. ]
[ -23.96776581 -28.58105469 -31.359375 -25.08592987 -28.90721893]
[ -8.21874237 -12.71092987 -15.46875 -15.27832031 -10.57226562]
[ -7.75 -12.12499237 -15. -15. -10.984375 ]]
[[-117. -117. -117. -117. -117. ]
[-117. -117. -117. -117. -117. ]
[ -43.96776581 -48.58105469 -51.359375 -45.08592987 -48.90721893]
[ -26.21874237 -30.71092987 -33.46875 -33.27832031 -33.57226562]
[ -24.75 -29.12499237 -32. -32. -31.984375 ]]
[[-123. -123. -123. -123. -123. ]
[-123. -123. -123. -123. -123. ]
[ -52.96776581 -57.58105469 -60.359375 -54.08592987 -57.90721893]
[ -40.21874237 -44.71092987 -47.46875 -47.27832031 -44.572258 ]
[ -40.75 -45.12499237 -48. -48. -47.984375 ]]]
python code
print(image.shape) # hwc,rgb,0-1 (1080, 1920, 3)
print(image.dtype) # float32
# resize
image = cv2.resize(image, (512,512))
print("image resize = ",image.shape) # (512, 512, 3)
# hwc,rgb ===> chw, bgr
r,g,b = image[:,:,0],image[:,:,1],image[:,:,2]
print(b.shape) # (512, 512)
print(g.shape) # (512, 512)
print(r.shape) # (512, 512)
bgr = np.zeros([3,b.shape[0],b.shape[1]])
print(bgr.shape)
bgr[0,:,:] = b
bgr[1,:,:] = g
bgr[2,:,:] = r
# 0-1 ===>0-255
bgr = bgr *255.
# -mean
print("")
bgr[0] -= 104
bgr[1] -= 117
bgr[2] -= 123
print(bgr[:,:5,:5])
output
(1080, 1920, 3)
float32
('image resize = ', (512, 512, 3))
float32
(512, 512)
(512, 512)
(512, 512)
(3, 512, 512)
[[[ -97.99999988 -97.99999988 -97.99999988 -97.99999988 -97.99999988]
[ -97.99999988 -97.99999988 -97.99999988 -97.99999988 -97.99999988]
[ -23.9677673 -28.58105415 -31.35937387 -25.0859333 -28.90722105]
[ -8.21874478 -12.71093214 -15.46874815 -15.27831757 -10.5722701 ]
[ -7.74999434 -12.12499598 -14.99999771 -14.99999771 -10.98437318]]
[[-117. -117. -117. -117. -117. ]
[-117. -117. -117. -117. -117. ]
[ -43.96776688 -48.58105373 -51.35937345 -45.08593288 -48.90722823]
[ -26.21874449 -30.71093184 -33.46874785 -33.27831727 -33.5722695 ]
[ -24.7499941 -29.12499574 -31.99999747 -31.99999747 -31.98437271]]
[[-123. -123. -123. -123. -123. ]
[-123. -123. -123. -123. -123. ]
[ -52.9677667 -57.58105356 -60.35937327 -54.0859327 -57.90722805]
[ -40.21874401 -44.71093136 -47.46874738 -47.2783168 -44.5722692 ]
[ -40.7499935 -45.12499514 -47.99999687 -47.99999687 -47.98437211]]]
cv2.imread
data_shape = [1,3,512,512]
transformer = caffe.io.Transformer({'data':data_shape})
transformer.set_transpose('data', (2, 0, 1))
transformer.set_mean('data', np.array([104, 117, 123]))
image_file = "./images/1.png"
print("image_file=", image_file)
image = cv2.imread(image_file)
print("image.shape=", image.shape)
transformed_image = transformer.preprocess('data', image)
print("transformed_image.shape=", transformed_image.shape)
b,g,r = transformed_image
print(b.shape)
print(g.shape)
print(r.shape)
print("")
print(transformed_image[:,:5,:5])
output
('image_file=', './images/1.png')
('image.shape=', (1080, 1920, 3))
('transformed_image.shape=', (3, 512, 512))
(512, 512)
(512, 512)
(512, 512)
[[[ -98. -98. -98. -98. -98. ]
[ -98. -98. -98. -98. -98. ]
[ -23.96777344 -28.58105469 -31.359375 -25.0859375 -28.90722656]
[ -8.21875 -12.7109375 -15.46875 -15.27832031 -10.57226562]
[ -7.75 -12.125 -15. -15. -10.984375 ]]
[[-117. -117. -117. -117. -117. ]
[-117. -117. -117. -117. -117. ]
[ -43.96777344 -48.58105469 -51.359375 -45.0859375 -48.90722656]
[ -26.21875 -30.7109375 -33.46875 -33.27832031 -33.57226562]
[ -24.75 -29.125 -32. -32. -31.984375 ]]
[[-123. -123. -123. -123. -123. ]
[-123. -123. -123. -123. -123. ]
[ -52.96777344 -57.58105469 -60.35937119 -54.0859375 -57.90722656]
[ -40.21875 -44.7109375 -47.46875 -47.27832031 -44.57226562]
[ -40.75 -45.125 -48. -48. -47.984375 ]]]
python code
print(image.shape) # hwc,bgr,0-255 (1080, 1920, 3)
print(image.dtype) # uint8
# int8 ===>float32
image = image.astype('float32') # key steps
print(image.dtype) # float32
# resize
image = cv2.resize(image, (512,512))
print("image resize = ",image.shape) # (512, 512, 3)
print(image.dtype) # float32
# hwc ===> chw
b,g,r = image[:,:,0],image[:,:,1],image[:,:,2]
print(b.shape) # (512, 512)
print(g.shape) # (512, 512)
print(r.shape) # (512, 512)
bgr = np.zeros([3,b.shape[0],b.shape[1]])
print(bgr.shape)
# -mean
b -= 104
g -= 117
r -= 123
bgr[0,:,:] = b
bgr[1,:,:] = g
bgr[2,:,:] = r
print(bgr[:,:5,:5])
python code v2
image = cv2.imread(filepath) # hwc, bgr,0-255
print(image.dtype) # uint8
image = image.astype('float32') # key steps
image = cv2.resize(image, (512,512))
print("image resize = ",image.shape) # (512, 512, 3)
print(image.dtype) # float32
image -= np.array((104.00698793,116.66876762,122.67891434)) # bgr mean
image = image.transpose((2,0,1)) # hwc ===>chw
print(image[:,:5,:5])
output
(1080, 1920, 3)
uint8
float32
('image resize = ', (512, 512, 3))
float32
(512, 512)
(512, 512)
(512, 512)
(3, 512, 512)
[[[ -98. -98. -98. -98. -98. ]
[ -98. -98. -98. -98. -98. ]
[ -23.96777344 -28.58105469 -31.359375 -25.0859375 -28.90722656]
[ -8.21875 -12.7109375 -15.46875 -15.27832031 -10.57226562]
[ -7.75 -12.125 -15. -15. -10.984375 ]]
[[-117. -117. -117. -117. -117. ]
[-117. -117. -117. -117. -117. ]
[ -43.96777344 -48.58105469 -51.359375 -45.0859375 -48.90722656]
[ -26.21875 -30.7109375 -33.46875 -33.27832031 -33.57226562]
[ -24.75 -29.125 -32. -32. -31.984375 ]]
[[-123. -123. -123. -123. -123. ]
[-123. -123. -123. -123. -123. ]
[ -52.96777344 -57.58105469 -60.359375 -54.0859375 -57.90722656]
[ -40.21875 -44.7109375 -47.46875 -47.27832031 -44.57226562]
[ -40.75 -45.125 -48. -48. -47.984375 ]]]
Conclusions
- Matplot.imread: dims: (height,width,channels),order: RGB,range: [0,255] dtype: uint8, plot
- OpenCV.imread: dims: (height,width,channels),order: BGR,range: [0,255] dtype: uint8, plot
- caffe.io.load_image: dims: (height,width,channels),order: RGB,range: [0,1] dtype: float32 (caffe_io_image = matplot_image/255.0) ,plot
- caffe Network Input(Transformer): dims: (m,c,h,w), order: BGR, range [0,255],dtype: float32, PLOT ERROR
Reference
History