I want to integrate Farneback's optical flow in my framework and I started with a Python prototype.
I tried to follow the steps described in this paper and I compared to OpenCV's output, but I get far worse results.
Here is how I proceeded:
I have a multiscale approach, with an initial guess dx = dy = 0 by default for each pixel,
I estimate the optical flow at the current scale. Just like OpenCV, I perform the estimation several times at each scale. The estimation consists of:
Second order polynomial expansion for each image, i.e. locally estimate the parameters A and b in the equation
for eah image
Combine the parameters of both images as suggested in the paper:
Compute
and
Average
and
in a neighbourhood
Solve the system :
Here is my complete code
import numpy as np
import math
import cv2
# --------------------------------------------------------------------------- #
def conjgrad(A, b, x0):
r = b - np.dot(A, x0)
p = r
rsold = np.dot(r.T, r)
x = x0
for i in range(len(b)):
Ap = np.dot(A, p)
alpha = rsold / (np.dot(p.T, Ap) + 1e-5)
x = x + alpha * p
r = r - alpha * Ap
rsnew = np.dot(r.T, r)
if math.sqrt(rsnew) < 1e-10:
break
p = r + (rsnew / rsold) * p
rsold = rsnew
return x
# --------------------------------------------------------------------------- #
def Farneback_estimParams(im, flowX=None, flowY=None):
sizeY, sizeX = im.shape
halfSize = 5
roiSize = 2*halfSize+1
nbNeighbours = roiSize*roiSize
A = np.zeros((4, sizeY, sizeX), dtype=np.float32)
B = np.zeros((2, sizeY, sizeX), dtype=np.float32)
for y in range(sizeY):
for x in range(sizeX):
dx = 0
dy = 0
if flowX is not None:
dx = flowX[y, x]
dy = flowY[y, x]
# Build the coordinate matrix:
# [1, x, y, x*x, y*y, x*y
# ... ]
# For the entire neighbourhood
a = np.ones((nbNeighbours, 6))
b = np.ones((nbNeighbours, 1))
i = 0
for oy in range(-halfSize, halfSize+1):
for ox in range(-halfSize, halfSize+1):
a[i, 0] = 1
a[i, 1] = ox
a[i, 2] = oy
a[i, 3] = ox*ox
a[i, 4] = oy*oy
a[i, 5] = ox*oy
curX = int(x + ox + dx)
curY = int(y+oy + dy)
if curX < 0 or curX >= sizeX or curY < 0 or curY >= sizeY:
b[i, 0] = 0
else:
b[i, 0] = im[curY, curX]
i += 1
# Solve a*p = b
ata = np.dot(a.T, a)
atb = np.dot(a.T, b)
p = conjgrad(ata, atb, np.zeros((6, 1)))
A[0, y, x] = p[3]
A[1, y, x] = p[4]/2
A[2, y, x] = p[4]/2
A[3, y, x] = p[5]
B[0, y, x] = p[0]
B[1, y, x] = p[1]
return A, B
# --------------------------------------------------------------------------- #
def Farneback(im1, im2, dx0, dy0):
nbIter = 10
sizeY, sizeX = im1.shape
# Create the output images
dx = dx0
dy = dy0
for curIter in range(nbIter):
# Estimate the parameters
A1, b1 = Farneback_estimParams(im1)
A2, b2 = Farneback_estimParams(im2, dx, dy)
# Combine the parameters
A = (A1+A2) / 0.5
Ad = np.zeros_like(b1)
Ad[0, :, :] = A[0, :, :]*dx + A[1, :, :]*dy
Ad[1, :, :] = A[2, :, :]*dx + A[3, :, :]*dy
deltaB = (b1-b2)/0.5 + Ad
# Compute A^T * A
ATA = np.zeros((4, sizeY, sizeX), dtype=np.float32)
ATA[0, :, :] = A[0, :, :]*A[0, :, :] + A[1, :, :]*A[2, :, :]
ATA[1, :, :] = A[0, :, :]*A[1, :, :] + A[1, :, :]*A[3, :, :]
ATA[2, :, :] = A[2, :, :]*A[0, :, :] + A[3, :, :]*A[2, :, :]
ATA[3, :, :] = A[2, :, :]*A[1, :, :] + A[3, :, :]*A[3, :, :]
# Compute A^T * B
ATB = np.zeros((2, sizeY, sizeX), dtype=np.float32)
ATB[0, :, :] = A[0, :, :] * deltaB[0, :, :] + A[2, :, :] * deltaB[1, :, :]
ATB[1, :, :] = A[1, :, :] * deltaB[0, :, :] + A[3, :, :] * deltaB[1, :, :]
# Smooth ATA and ATB
winSize = 15
for i in range(4):
ATA[i, :, :] = cv2.blur(ATA[i, :, :], (winSize, winSize))
if i < 2:
ATB[i, :, :] = cv2.blur(ATB[i, :, :], (winSize, winSize))
# For each pixel
for y in range(sizeY):
for x in range(sizeX):
# Build the matrix ATA and the vector ATB
ata = np.zeros((2, 2))
atb = np.zeros((2, 1))
ata[0, 0] = ATA[0, y, x]
ata[0, 1] = ATA[1, y, x]
ata[1, 0] = ATA[2, y, x]
ata[1, 1] = ATA[3, y, x]
atb[0] = ATB[0, y, x]
atb[1] = ATB[1, y, x]
# Solve the system
d = conjgrad(ata, atb, np.zeros((2, 1)))
dx[y, x] = d[0]
dy[y, x] = d[1]
return dx, dy
# --------------------------------------------------------------------------- #
def multiScaleFarneback(im1, im2, dx0=None, dy0=None):
# Multiscale parameters
pyrScale = 0.5
nbLevels = 2
# Initial displacement
sizeY, sizeX = im1.shape
if dx0 == None or dy0 == None:
dx0 = np.zeros_like(im1).astype(np.float32)
dy0 = np.zeros_like(im1).astype(np.float32)
# Create the list of the scales
scales = [1]
for i in range(0, nbLevels):
scales.append(scales[i]*pyrScale)
# For each level, from coarse to fine
for curLevel in range(nbLevels, -1, -1):
curScale = scales[curLevel]
print("Level", curLevel, ", Scale", curScale)
# Sub-sample the images
curSizeX = int(sizeX * curScale)
curSizeY = int(sizeY * curScale)
curIm1 = cv2.resize(im1, (curSizeY, curSizeX), interpolation=cv2.INTER_AREA).astype(np.float32)
curIm2 = cv2.resize(im2, (curSizeY, curSizeX), interpolation=cv2.INTER_AREA).astype(np.float32)
# Resample the displacement field
if curLevel == nbLevels:
dx_prev = cv2.resize(dx0, (curSizeY, curSizeX), interpolation=cv2.INTER_AREA)
dy_prev = cv2.resize(dy0, (curSizeY, curSizeX), interpolation=cv2.INTER_AREA)
dx_prev = dx_prev * curScale
dy_prev = dy_prev * curScale
else:
dx_prev = cv2.resize(dx, (curSizeY, curSizeX), interpolation=cv2.INTER_AREA)
dy_prev = cv2.resize(dy, (curSizeY, curSizeX), interpolation=cv2.INTER_AREA)
dx_prev = dx_prev / curScale
dy_prev = dy_prev / curScale
# Compute the optical flow at the current level, using the previous estimation
dx, dy = Farneback(curIm1, curIm2, dx_prev, dy_prev)
return dx, dy
I know there is a difference in the way the parameters A and b are calculated between my implementation and OpenCV's code. I just do not understand this step and the explanations in Farneback's PhD thesis so I coded a simple and naive non-weighted least squared calculation.
Is that so important or there is another mistake causing bad estimation?
This is how I set up the parameters on OpenCV:
flow = cv2.calcOpticalFlowFarneback(im1, im2, None, pyr_scale=0.5, levels=2, winsize=15, iterations=10, poly_n=5, poly_sigma=1.1, flags=0)
Thanks