Farneback's optical flow implementation

147 views Asked by At

I want to integrate Farneback's optical flow in my framework and I started with a Python prototype.

I tried to follow the steps described in this paper and I compared to OpenCV's output, but I get far worse results.

Here is how I proceeded:

  1. I have a multiscale approach, with an initial guess dx = dy = 0 by default for each pixel,

  2. I estimate the optical flow at the current scale. Just like OpenCV, I perform the estimation several times at each scale. The estimation consists of:

  • Second order polynomial expansion for each image, i.e. locally estimate the parameters A and b in the equation eq1 for eah image

  • Combine the parameters of both images as suggested in the paper:

eq2

  1. Compute eq3.1 and eq3.2

  2. Average eq3.1 and eq3.2 in a neighbourhood

  3. Solve the system : eq4

Here is my complete code

import numpy as np
import math
import cv2
# --------------------------------------------------------------------------- #
def conjgrad(A, b, x0):
    r = b - np.dot(A, x0)
    p = r
    rsold = np.dot(r.T, r)
    x = x0

    for i in range(len(b)):
        Ap = np.dot(A, p)
        alpha = rsold / (np.dot(p.T, Ap) + 1e-5)
        x = x + alpha * p
        r = r - alpha * Ap
        rsnew = np.dot(r.T, r)
        if math.sqrt(rsnew) < 1e-10:
            break
        p = r + (rsnew / rsold) * p
        rsold = rsnew
    return x

# --------------------------------------------------------------------------- #
def Farneback_estimParams(im, flowX=None, flowY=None):
    sizeY, sizeX = im.shape

    halfSize = 5
    roiSize = 2*halfSize+1
    nbNeighbours = roiSize*roiSize

    A = np.zeros((4, sizeY, sizeX), dtype=np.float32)
    B = np.zeros((2, sizeY, sizeX), dtype=np.float32)

    for y in range(sizeY):
        for x in range(sizeX):
            dx = 0
            dy = 0
            if flowX is not None:
                dx = flowX[y, x]
                dy = flowY[y, x]

            # Build the coordinate matrix:
            # [1, x, y, x*x, y*y, x*y
            #  ...                   ]
            # For the entire neighbourhood
            a = np.ones((nbNeighbours, 6))
            b = np.ones((nbNeighbours, 1))
            i = 0
            for oy in range(-halfSize, halfSize+1):
                for ox in range(-halfSize, halfSize+1):
                    a[i, 0] = 1
                    a[i, 1] = ox
                    a[i, 2] = oy
                    a[i, 3] = ox*ox
                    a[i, 4] = oy*oy
                    a[i, 5] = ox*oy

                    curX = int(x + ox + dx)
                    curY = int(y+oy + dy)
                    if curX < 0 or curX >= sizeX or curY < 0 or curY >= sizeY:
                        b[i, 0] = 0
                    else:
                        b[i, 0] = im[curY, curX]

                    i += 1

            # Solve a*p = b
            ata = np.dot(a.T, a)
            atb = np.dot(a.T, b)
            p = conjgrad(ata, atb, np.zeros((6, 1)))

            A[0, y, x] = p[3]
            A[1, y, x] = p[4]/2
            A[2, y, x] = p[4]/2
            A[3, y, x] = p[5]

            B[0, y, x] = p[0]
            B[1, y, x] = p[1]

    return A, B

# --------------------------------------------------------------------------- #
def Farneback(im1, im2, dx0, dy0):
    nbIter = 10

    sizeY, sizeX = im1.shape

    # Create the output images
    dx = dx0
    dy = dy0

    for curIter in range(nbIter):
        # Estimate the parameters
        A1, b1 = Farneback_estimParams(im1)
        A2, b2 = Farneback_estimParams(im2, dx, dy)

        # Combine the parameters
        A = (A1+A2) / 0.5

        Ad = np.zeros_like(b1)
        Ad[0, :, :] = A[0, :, :]*dx + A[1, :, :]*dy
        Ad[1, :, :] = A[2, :, :]*dx + A[3, :, :]*dy

        deltaB = (b1-b2)/0.5 + Ad

        # Compute A^T * A
        ATA = np.zeros((4, sizeY, sizeX), dtype=np.float32)

        ATA[0, :, :] = A[0, :, :]*A[0, :, :] + A[1, :, :]*A[2, :, :]
        ATA[1, :, :] = A[0, :, :]*A[1, :, :] + A[1, :, :]*A[3, :, :]
        ATA[2, :, :] = A[2, :, :]*A[0, :, :] + A[3, :, :]*A[2, :, :]
        ATA[3, :, :] = A[2, :, :]*A[1, :, :] + A[3, :, :]*A[3, :, :]

        # Compute A^T * B
        ATB = np.zeros((2, sizeY, sizeX), dtype=np.float32)
        ATB[0, :, :] = A[0, :, :] * deltaB[0, :, :] + A[2, :, :] * deltaB[1, :, :]
        ATB[1, :, :] = A[1, :, :] * deltaB[0, :, :] + A[3, :, :] * deltaB[1, :, :]

        # Smooth ATA and ATB
        winSize = 15
        for i in range(4):
            ATA[i, :, :] = cv2.blur(ATA[i, :, :], (winSize, winSize))
            if i < 2:
                ATB[i, :, :] = cv2.blur(ATB[i, :, :], (winSize, winSize))

        # For each pixel
        for y in range(sizeY):
            for x in range(sizeX):
                # Build the matrix ATA and the vector ATB
                ata = np.zeros((2, 2))
                atb = np.zeros((2, 1))

                ata[0, 0] = ATA[0, y, x]
                ata[0, 1] = ATA[1, y, x]
                ata[1, 0] = ATA[2, y, x]
                ata[1, 1] = ATA[3, y, x]

                atb[0] = ATB[0, y, x]
                atb[1] = ATB[1, y, x]

                # Solve the system
                d = conjgrad(ata, atb, np.zeros((2, 1)))

                dx[y, x] = d[0]
                dy[y, x] = d[1]

    return dx, dy

# --------------------------------------------------------------------------- #
def multiScaleFarneback(im1, im2, dx0=None, dy0=None):
    # Multiscale parameters
    pyrScale = 0.5
    nbLevels = 2

    # Initial displacement
    sizeY, sizeX = im1.shape
    if dx0 == None or dy0 == None:
        dx0 = np.zeros_like(im1).astype(np.float32)
        dy0 = np.zeros_like(im1).astype(np.float32)

    # Create the list of the scales
    scales = [1]
    for i in range(0, nbLevels):
        scales.append(scales[i]*pyrScale)

    # For each level, from coarse to fine
    for curLevel in range(nbLevels, -1, -1):
        curScale = scales[curLevel]
        print("Level", curLevel, ", Scale", curScale)

        # Sub-sample the images
        curSizeX = int(sizeX * curScale)
        curSizeY = int(sizeY * curScale)
        curIm1 = cv2.resize(im1, (curSizeY, curSizeX), interpolation=cv2.INTER_AREA).astype(np.float32)
        curIm2 = cv2.resize(im2, (curSizeY, curSizeX), interpolation=cv2.INTER_AREA).astype(np.float32)


        # Resample the displacement field
        if curLevel == nbLevels:
            dx_prev = cv2.resize(dx0, (curSizeY, curSizeX), interpolation=cv2.INTER_AREA)
            dy_prev = cv2.resize(dy0, (curSizeY, curSizeX), interpolation=cv2.INTER_AREA)

            dx_prev = dx_prev * curScale
            dy_prev = dy_prev * curScale
        else:
            dx_prev = cv2.resize(dx, (curSizeY, curSizeX), interpolation=cv2.INTER_AREA)
            dy_prev = cv2.resize(dy, (curSizeY, curSizeX), interpolation=cv2.INTER_AREA)
            dx_prev = dx_prev / curScale
            dy_prev = dy_prev / curScale

        # Compute the optical flow at the current level, using the previous estimation
        dx, dy = Farneback(curIm1, curIm2, dx_prev, dy_prev)

    return dx, dy

I know there is a difference in the way the parameters A and b are calculated between my implementation and OpenCV's code. I just do not understand this step and the explanations in Farneback's PhD thesis so I coded a simple and naive non-weighted least squared calculation.

Is that so important or there is another mistake causing bad estimation?

This is how I set up the parameters on OpenCV:

flow = cv2.calcOpticalFlowFarneback(im1, im2, None, pyr_scale=0.5, levels=2, winsize=15, iterations=10, poly_n=5, poly_sigma=1.1, flags=0)

Thanks

0

There are 0 answers