基于 Python & OpenCV 的简易答题卡识别

又有一个多月的时间了呢 = =
刚想起来还欠着一篇文章没写,趁着没忘干净赶紧补上

先上样卡(A4,扫描图片为600dpi)
截图 2015-07-21 02.10.02

整体并不是很复杂,但一口气手工切40+张也是够累,所以想办法自己写了个识别程序
opencv是个功能丰富的开源库,这里主要用到查找边界的功能。

opencv的原生api支持c、java、python,python 比较易于调试,所以选择了它。这里使用 python 2.7。
对于有编程经验的人来讲,python 比较容易入门,基本上摸索十分钟左右就可以了(靠谱的IDE也是重要条件嗯)

首先下载 opencv 和 numpy,numpy 装好可以直接用,opencv 要到 build/python 下找到相应的文件复制到项目目录
P.S. 如果运行出错的话,要注意 python、numpy、opency 的平台版本需保持一致

因为图片整体结构比较简单,而且定位点都是矩形,所以用了比较简单的方法,把边界都抽象为矩形处理。
大概流程是这样(findPoint.py 代码后附):

引入依赖


import findPoint
import cv2

图片修正(根据四角的定位点修正图片倾斜)


img, rects = findPoint.process_img(findPoint.read_file(dir+source))
fixed = findPoint.fix_image(img, rects)

查找四角定位点


img, rects = findPoint.process_img(fixed)

height,width,ch = img.shape
points = findPoint.find_corner_points(width, height, rects)

移除无关的矩形


rects = findPoint.remove_rects(rects, points[0].x_max + 10, points[3].x_min - 10, points[0].y_max + 10, points[3].y_min - 10)

分别取出 x、y 轴定位点并排序(分别对应上、左两部分)


x_points = findPoint.remove_rects(rects, 0, width, points[0].y_max + 10, height)
y_points = findPoint.remove_rects(rects, points[0].x_max + 10, width, 0, height)

x_points.sort(lambda a,b: a.x_min - b.x_min)
y_points.sort(lambda a,b: a.y_min - b.y_min)

然后就可以根据定位点切分图片了


# match 为预定义的切分规则
match = ['student_15',
        ['1', [2, 4], [0, [0, 380]]],
        ['2', [0, 4], [[1, -200], 1]],
        ['3', [1, 2], [2, 2]],
        ['4', [1, 3], [3, 3]],
        ['5', [0, 2], [[4, -400], 4]]
]

def parse_point(direction, type, point, points):
    if isinstance(point, list):
        return parse_point(direction, type, point[0], points) + point[1]

    if direction == 'x':
        if type == 'min':
            return points[point].x_min
        else:
            return points[point].x_max
    else:
        if type == 'min':
            return points[point].y_min
        else:
            return points[point].y_max

for i in range(1, len(match)):
    x_min = parse_point('x', 'min', match[i][1][0], x_points)
    x_max = parse_point('x', 'max', match[i][1][1], x_points)
    y_min = parse_point('y', 'min', match[i][2][0], y_points)
    y_max = parse_point('y', 'max', match[i][2][1], y_points)

    newimg = img[y_min:y_max, x_min:x_max, :]
    cv2.imwrite(match[i][0] + '.png', newimg)

findPoint.py:


import numpy as np
import cv2
import math

def square(num):
    return num*num

class Rect:
    def __init__(self, x_min, x_max, y_min, y_max):
        self.x_min = x_min
        self.x_max = x_max
        self.y_min = y_min
        self.y_max = y_max

    def __repr__(self):
        return "x: " + np.str(self.x_min) + " ~ " + np.str(self.x_max) + " y: " + np.str(self.y_min) + " ~ " + np.str(self.y_max) + "\n"

    def __str__(self):
        return "x: " + np.str(self.x_min) + " ~ " + np.str(self.x_max) + " y: " + np.str(self.y_min) + " ~ " + np.str(self.y_max) + "\n"

    def distance(self, point):
        dis = 0
        if point[0] < self.x_min:
            dis += self.x_min - point[0]
        elif point[0] > self.x_max:
            dis += point[0] - self.x_max

        if point[1] < self.y_min:
            dis += self.y_min - point[1]
        elif point[1] > self.y_max:
            dis += point[1] - self.y_max

        return dis

    def distance_math(self, point):
        dis = 0
        if point[0] < self.x_min:
            dis += square(self.x_min - point[0])
        elif point[0] > self.x_max:
            dis += square(point[0] - self.x_max)

        if point[1] < self.y_min:
            dis += square(self.y_min - point[1])
        elif point[1] > self.y_max:
            dis += square(point[1] - self.y_max)

        return math.sqrt(dis)

    def center(self):
        return [(self.x_min+self.x_max)/2, (self.y_min+self.y_max)/2]

def line_to_rect(line):
    x_min=x_max=y_min=y_max=-1
    for point in line:
        if x_min == -1:
            x_min = x_max = point[0][0]
            y_min = y_max = point[0][1]
        else:
            x_min = min(x_min, point[0][0])
            x_max = max(x_max, point[0][0])
            y_min = min(y_min, point[0][1])
            y_max = max(y_max, point[0][1])
    return Rect(x_min, x_max, y_min, y_max)

def remove_non_rect(width, height, contours):
    rects = []
    for line in contours:
        _r=line_to_rect(line)
        if _r.x_min < 20 or _r.y_min < 20 or _r.x_max > width-20 or _r.y_max > height-20:
            continue

        xd = _r.x_max-_r.x_min
        yd = _r.y_max-_r.y_min
        if min(xd, yd) < 15:
            continue

        diff = 10
        xmin = _r.x_min + diff
        xmax = _r.x_max - diff
        ymin = _r.y_min + diff
        ymax = _r.y_max - diff
        for point in line:
            if xmin < point[0][0] < xmax and ymin < point[0][1] < ymax:
                diff = -1
                break
        if diff != -1:
            rects.append(_r)
    return rects

def read_file(filename):
    img=cv2.imread(filename)
    return img

def process_img(img):
    grey=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    retval,grey=cv2.threshold(grey,80,255,cv2.THRESH_BINARY_INV)

    grey=cv2.erode(grey,None)
    grey=cv2.dilate(grey,None)

    _,contours,hierarchy=cv2.findContours(grey,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)

    rows,cols,ch = img.shape
    return img, remove_non_rect(cols, rows, contours)

def calculate_distance(width, height, rect):
    return [rect.distance([0,0]), rect.distance([width,0]), rect.distance([0,height]), rect.distance([width,height])]

def find_corner_points(width, height, rects):
    result  = [None, None, None, None]
    distance = [None, None, None, None]
    for rect in rects:
        if result[0] is None:
            result = [rect, rect, rect, rect]
            distance = calculate_distance(width, height, rect)
        else:
            _dis = calculate_distance(width, height, rect)
            for i in range(4):
                if _dis[i] < distance[i]:
                    distance[i] = _dis[i]
                    result[i] = rect
    return result

def draw_rects(img, rects, output):
    print(img.shape)
    rows = img.shape[0]
    cols = img.shape[1]
    img = img[:]
    for index,rect in enumerate(rects):
        img = cv2.rectangle(img, (rect.x_min,rect.y_min), (rect.x_max,rect.y_max), (0,255,0), 3)
        img = cv2.putText(img,str(index),(rect.x_max+5,rect.y_max+5), cv2.FONT_HERSHEY_SIMPLEX, 2,(255,255,0), 3)
    points = find_corner_points(cols, rows, rects)
    if len(rects) > 0:
        for rect in points:
            img = cv2.rectangle(img, (rect.x_min,rect.y_min), (rect.x_max,rect.y_max), (0,255,255), 5)
    #cv2.imshow('hello', img)
    cv2.imwrite(output + ".png", img)
    cv2.waitKey(0)

def fix_image(img, rects):
    rows,cols,ch = img.shape

    points = find_corner_points(cols, rows, rects)
    tl = points[0]
    tl_c = tl.center()
    tr = points[1]
    tr_c = tr.center()
    bl = points[2]
    bl_c = bl.center()
    pts1 = np.float32([tl_c, tr_c, bl_c])
    pts2 = np.float32([tl_c, [tl_c[0] + tl.distance_math(tr_c), tl_c[1]], [tl_c[0], tl_c[1] + tl.distance_math(bl_c)]])
    M = cv2.getAffineTransform(pts1,pts2)
    dst = cv2.warpAffine(img,M,(cols,rows))
    return dst

def remove_rects(rects, x_min, x_max, y_min, y_max):
    valid_rects = []
    for rect in rects:
        if x_min < rect.x_max < x_max and y_min < rect.y_max < y_max or x_min < rect.x_min < x_max and y_min < rect.y_min < y_max:
            continue
        valid_rects.append(rect)
    return valid_rects

2 评论

发表评论

电子邮件地址不会被公开。 必填项已用*标注