注意:本文最初使用jupyter notebook编写,后经程序转换为markdown,所以格式可能有多处错误,懒得修改了。
最近学习计算机视觉的时候遇到找两张图片不同的问题,找到解决方案后,在QQ游戏大家来找茬中实验了一下效果.
要找不同首先得把两张图片提取出来嘛,我们先把电脑屏幕截图,然后再提取我们需要的图片.
我对比过pyscreenshot,PIL和win32gui三种截屏库的截屏速度,发现PIL比其他的截屏方式快那么一点.
#from PIL import ImageGrab
import numpy as np
import time
import matplotlib.pyplot as plt
import cv2
import itertools
%matplotlib inline
#pic=ImageGrab.grab()
#img = np.array(pic)
img = plt.imread('images/2.png')
img = np.uint8(img*255)
plt.imshow(img)
屏幕截图有了,现在我们来提取其中我们需要的两幅图,首先我想到的是用边缘检测提取边界,不过效果不是很理想
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
edges = cv2.Canny(gray, 300, 500, apertureSize = 5)
plt.imshow(edges,cmap='gray')
所以我想先用颜色来区分一下,先来把目标图片和背景图片的颜色分布画出来,方便我们观察
target = plt.imread('images/target.png')
target = np.uint8(target*255)
target = cv2.cvtColor(target,cv2.COLOR_RGB2HSV)
target_hhist = np.histogram(target\[:,:,0\], bins=32, range=(0, 256))
target_shist = np.histogram(target\[:,:,1\], bins=32, range=(0, 256))
target_vhist = np.histogram(target\[:,:,2\], bins=32, range=(0, 256))
edge = plt.imread('images/edge.png')
edge = np.uint8(edge*255)
edge = cv2.cvtColor(edge,cv2.COLOR_RGB2HSV)
edge_hhist = np.histogram(edge\[:,:,0\], bins=32, range=(0, 256))
edge_shist = np.histogram(edge\[:,:,1\], bins=32, range=(0, 256))
edge_vhist = np.histogram(edge\[:,:,2\], bins=32, range=(0, 256))
bin_edges = target_hhist\[1\]
bin_centers = (bin_edges\[1:\] + bin_edges\[0:len(bin_edges)-1\])/2
fig = plt.figure(figsize=(12,8))
plt.subplot(231)
plt.bar(bin_centers, target_hhist\[0\])
plt.xlim(0, 256)
plt.title('target H Histogram')
plt.subplot(232)
plt.bar(bin_centers, target_shist\[0\])
plt.xlim(0, 256)
plt.title('target S Histogram')
plt.subplot(233)
plt.bar(bin_centers, target_vhist\[0\])
plt.xlim(0, 256)
plt.title('target V Histogram')
plt.subplot(234)
plt.bar(bin_centers, edge_hhist\[0\])
plt.xlim(0, 233)
plt.title('edge H Histogram')
plt.subplot(235)
plt.bar(bin_centers, edge_shist\[0\])
plt.xlim(0, 233)
plt.title('edge S Histogram')
plt.subplot(236)
plt.bar(bin_centers, edge_vhist\[0\])
plt.xlim(0, 256)
plt.title('edge V Histogram')
从上图中我们发现背景的色调和饱和度比较集中,所以我们可以通过这两个颜色通道来过滤掉背景
img = img\[300:-200,:,:3\]
hsv = cv2.cvtColor(img,cv2.COLOR_RGB2HSV)
lower = np.array(\[95,150,100\])
upper = np.array(\[105,190,230\])
mask = cv2.inRange(hsv, lower, upper)
f = plt.figure(figsize=(10,5))
plt.imshow(mask,cmap='gray')
统计一下x轴方向和y轴方向符合条件的像素点的个数,并且把数量统计画成折线图.
通过折线图发现在图片的边缘处折线图会出现明显的断崖,我们可以根据这点来提取出图片
yhist = np.sum(mask,axis=1)
xhist = np.sum(mask,axis=0)
f,(ax1,ax2) = plt.subplots(1,2,figsize=(15,5))
ax1.plot(np.arange(mask.shape\[0\]), yhist)
ax2.plot(np.arange(mask.shape\[1\]), xhist)
接下来通过阀值简化图形
a = np.zeros(yhist.shape)
b = np.zeros(xhist.shape)
a\[yhist>200000\] = 1
b\[xhist>100000\] = 1
f,(ax1,ax2) = plt.subplots(1,2,figsize=(15,5))
ax1.plot(np.arange(a.shape\[0\]), a)
ax2.plot(np.arange(b.shape\[0\]), b)
下面循环一下找出断崖处的坐标
def get_xs(indexs):
xs = \[\]
last = 0
for i in indexs\[0\]:
if i != last+1:
if last is not 0:
xs.append(last)
xs.append(i)
last = i
xs.append(indexs\[0\]\[-1\])
return xs
ys = np.where(a == 1)
ys = get_xs(ys)
print(ys)
xs = np.where(b == 1)
xs = get_xs(xs)
print(xs)
\[98, 120, 486, 515\]
\[409, 431, 917, 1002, 1488, 1500, 1504, 1504\]
至此,我们已经找出了颜色变化明显处的x坐标和y左边,接下来我们通过排列组合的方式,找出这些坐标所绘制出的直线可能组合成的矩形.
不过,还是有很多矩形不是我们想要的,所以还需要用矩形的长宽作为阀值来进一步筛选
import itertools
usable_x = \[\]
usable_y = \[\]
limit_y = (100,370)
limit_x = (100,487)
for i in itertools.combinations(ys, 2):
diff = i\[1\]-i\[0\]
if limit_y\[0\]<diff and diff<limit_y\[1\]:
usable_y.append(i)
for i in itertools.combinations(xs, 2):
diff = i\[1\]-i\[0\]
if limit_x\[0\]<diff and diff<limit_x\[1\]:
usable_x.append(i)
draw_img = np.copy(img)
matrix = \[\]
pictures = \[\]
for i in usable_x:
for j in usable_y:
matrix.append((i\[0\],j\[0\],i\[1\],j\[1\]))
pictures.append(img\[j\[0\]:j\[1\],i\[0\]:i\[1\]\])
cv2.rectangle(draw_img, (i\[0\],j\[0\]), (i\[1\],j\[1\]), (255, 0, 0), 5)
print(len(matrix))
\# 如果找到的图片不是两张,停止程序
assert len(pictures) == 2,"如果找到的图片不是两张,停止程序"
f = plt.figure(figsize=(20,5))
plt.imshow(draw_img)
f = plt.figure(figsize=(20,5))
plt.subplot(1, 2, 1), plt.imshow(pictures\[0\]), plt.xticks(\[\]), plt.yticks(\[\])
plt.subplot(1, 2, 2), plt.imshow(pictures\[1\]), plt.xticks(\[\]), plt.yticks(\[\])
plt.show()
OK,需要的图片提取出来之后,我们可以开始找两张图片的不同啦.
我们将两幅图的色值相减并取绝对值,这时打印一下结果会明显的发现不同的地方
imgA = pictures\[0\]
imgB = pictures\[1\]
result = cv2.absdiff(imgA, imgB)
f = plt.figure(figsize=(10,5))
plt.imshow(result)
plt.show()
_, result\_window\_bin = cv2.threshold(result, 80, 255, cv2.THRESH_BINARY)
edge = 10
result\_window\_bin = result\_window\_bin\[edge:-edge,edge:-edge\]
gray = cv2.cvtColor(result\_window\_bin, cv2.COLOR_BGR2GRAY)
_, contours, _ = cv2.findContours(gray, cv2.RETR_EXTERNAL, cv2.CHAIN\_APPROX\_SIMPLE)
imgC = imgA.copy()
for contour in contours:
min = np.nanmin(contour, 0)
max = np.nanmax(contour, 0)
loc = (int((min\[0\]\[0\]+max\[0\]\[0\])/2)+edge, int((min\[0\]\[1\]+max\[0\]\[1\])/2)+edge)
#用圆圈画出找到的不同之处
cv2.circle(imgC,loc,10,(0,0,255),10)
f = plt.figure(figsize=(20,5))
plt.subplot(1, 3, 1), plt.imshow(imgA), plt.xticks(\[\]), plt.yticks(\[\])
plt.subplot(1, 3, 2), plt.imshow(imgB), plt.xticks(\[\]), plt.yticks(\[\])
plt.subplot(1, 3, 3), plt.imshow(imgC), plt.xticks(\[\]), plt.yticks(\[\])
plt.show()
python中win32api库可以调用windows的API模拟鼠标点击操作,我们可以将所有不同之处模拟鼠标进行点击
import win32api, win32con
def click(x,y):
win32api.SetCursorPos((x,y))
win32api.mouse_event(win32con.MOUSEEVENTF_LEFTDOWN,x,y,0,0)
win32api.mouse_event(win32con.MOUSEEVENTF_LEFTUP,x,y,0,0)
click(10,10)