Dekker's method for numerical analysis not converging correctly

216 views Asked by At

I am trying to implement Dekker's method for root finding and I can't seem to figure out an issue where the algorithm does not seem to correctly converge to a solution and stops abruptly sometimes.

My implementation is based on the equations on the Wiki for Dekker's method Wiki

I'm quite new to python and programming and would appreciate any insight into where I might be going wrong.

def dekker(func, x_bounds, tolerance, max_iterations):
    # Initial bounds
    a0, b0 = x_bounds
    bk = b0
    b_minus_1 = a0 # previous bk but start with a
    
    k = 0

    print(' k xk        f(xk)')

    while k < max_iterations:
        fk = eval(fnon)(bk)
        f_minus_1 = eval(func)(b_minus_1)

        # check for division by zero when attemping secant method
        if fk - f_minus_1 == 0:
            # implement bisection method when division by zero 
            bk_plus_1 = (a0 + bk) / 2
        else:
            sk = bk - (bk - b_minus_1) / (fk - f_minus_1) * fk #secant
            mk = (a0 + bk) / 2 # bisection 
            
            if sk >= mk and sk <= bk:
                bk_plus_1 = sk
            else:
                bk_plus_1 = mk

        fk_plus_1 = eval(func)(bk_plus_1)

        if fk * fk_plus_1 < 0:
            a_k_plus_1 = bk
            b_k_plus_1 = bk_plus_1
        else:
            a_k_plus_1 = a0
            b_k_plus_1 = bk_plus_1

        if abs(eval(func)(a_k_plus_1)) < abs(eval(func)(b_k_plus_1)):
            best_solution = a_k_plus_1
        else:
            best_solution = b_k_plus_1

        k += 1
        print('{0:2.0f}  {1:2.8f}  {2:2.2e}'.format(k, bk_plus_1, abs(fk_plus_1)))

        if(abs(bk - b_minus_1) < tolerance):
            print('Converged')
            break

        bk = bk_plus_1
        b_minus_1 = bk

    if k == max_iterations:
        print('Not converged')

Testing method:

def function(x):
    return -np.cos(x) + x*x*x + 2*x*x + 1
dekker('function', [-3,1], 1e-6, 100)
3

There are 3 answers

0
Tim Roberts On

You had a number of problems.

First, you were only computingmk when you used the bisection method. You need to compute mk on EVERY iteration, because you need to compare sk and mk every time.

Next, your two lines of code to advance bk and bk_plus_1 at the end were not in the right order, so you trashed the bk value before saving it.

Next, you were doing the computation to check whether you needed to move ak, but you never used the results of that computation. This seems to work:

import numpy as np

def dekker(func, x_bounds, tolerance, max_iterations):
    ak, bk = x_bounds
    b_minus_1 = ak # previous bk but start with a
    
    print(' k xk        f(xk)')

    for k in range(max_iterations):
        fk = func(bk)
        f_minus_1 = func(b_minus_1)

        # check for division by zero when attempting secant method

        mk = (ak + bk) / 2 # bisection 
        if fk == f_minus_1:
            bk_plus_1 = mk
        else:
            sk = bk - (bk - b_minus_1) / (fk - f_minus_1) * fk #secant
            
            if mk <= sk <= bk or bk <= sk <= mk:
                bk_plus_1 = sk
            else:
                bk_plus_1 = mk

        fk_plus_1 = func(bk_plus_1)

        b_minus_1 = bk

        if fk * fk_plus_1 < 0:
            ak = bk

        bk = bk_plus_1

        if abs(func(ak)) < abs(func(bk)):
            best_solution = ak
        else:
            best_solution = bk

        print('{0:2.0f}  {1:2.8f}  {2:2.2e}'.format(k, bk_plus_1, abs(fk_plus_1)), best_solution)

        if(abs(fk - fk_plus_1) < tolerance):
            print('Converged', bk)
            return bk

    print('Not converged')
    return None


def function(x):
    return -np.cos(x) + x*x*x + 2*x*x + 1

def plotme():
    import matplotlib.pyplot as plt
    x = np.arange( -3, 1, .01 )
    y = function(x)
    plt.plot(x,y)
    plt.show()

#plotme()
print(dekker(function, [-3,1], 1e-6, 100))

Output:

 k xk        f(xk)
 0  -0.32179374  2.25e-01 -0.3217937387377827
 1  -0.41378380  3.56e-01 -0.41378379538544446
 2  -1.70689190  1.99e+00 -1.7068918976927223
 3  -2.35344595  2.52e-01 -2.3534459488463613
 4  -2.28064072  1.92e-01 -2.280640719265942
 5  -2.31209175  6.87e-03 -2.3120917484530077
 6  -2.31325946  2.00e-04 -2.313259456426466
 7  -2.31322651  1.97e-07 -2.313226508522713
 8  -2.31322654  5.65e-12 -2.313226541057924
Converged -2.313226541057924
-2.313226541057924
1
Malcolm On

Here is a working version.

def dekker(fnon, x_bounds, tolerance, max_iterations):
    # Initial bounds
    a0, b0 = x_bounds
    ak = a0
    bk = b0
    bk_minus_1 = a0 # previous bk but start with a
    
    k = 0

    print(' k xk        f(xk)')

    while k < max_iterations:
        fk = fnon(bk)
        fk_minus_1 = fnon(bk_minus_1)

        # check for division by zero when attemping secant method
        if fk - fk_minus_1 == 0:
            # implement bisection method when division by zero 
            bk_plus_1 = (a0 + bk) / 2
        else:
            sk = bk - (bk - bk_minus_1) / (fk - fk_minus_1) * fk #secant
            mk = (a0 + bk) / 2 # bisection 

            if sk >= mk and sk <= bk:
                bk_plus_1 = sk
            else:
                bk_plus_1 = mk

        fk_plus_1 = fnon(bk_plus_1)

        if fk * fk_plus_1 < 0:
            ak_plus_1 = bk
            bk_plus_1 = bk_plus_1
        else:
            ak_plus_1 = ak
            bk_plus_1 = bk_plus_1

        if abs(fnon(ak_plus_1)) < abs(fnon(bk_plus_1)):
            best_solution = ak_plus_1
        else:
            best_solution = bk_plus_1

        print('{0:2.0f}  {1:2.8f}  {2:2.2e}'.format(k, bk_plus_1, abs(fk_plus_1)))

        if(abs(bk - bk_plus_1) < tolerance):
            print('Converged')
            break

        bk_minus_1 = bk
        bk = bk_plus_1
        ak = ak_plus_1
        k += 1

    if k == max_iterations:
        print('Not converged')

And some validation

def f2(x):
    return x*x - 4
dekker(f2, [1,4], 1e-6, 100)

prints

 k xk        f(xk)
 1  2.50000000  2.25e+00
 2  2.15384615  6.39e-01
 3  2.01652893  6.64e-02
 4  2.00060976  2.44e-03
 5  2.00000251  1.00e-05
 6  2.00000000  1.53e-09
 7  2.00000000  1.78e-15
Converged

There were a number of issues not previously mentioned, but one important problem is your reinitialization at the end of the loop. If you set bk = bk_plus_1 and then bk_minus_1 = bk then you have actually set bk_minus_1 equal to bk_plus_1 as well and lost the value of bk that you wanted to set bk_minus_1 equal to. You haven't updated ak from your calculated ak_plus_1.

There are a number of further improvements you might consider. For example, you should have the method return the xk value you have found. That way the results of your function can be used in further calculations.

1
Lutz Lehmann On

Sources

There are several sources for what Dekker's algorithm could be. I use the variant referenced in

which uses the algorithm from

  • T. J. Dekker and W. Hoffmann (1968), "Algol 60 Procedures in Numerical Algebra"

and also cites a conference report

  • Dekker (1969), "Finding a zero by means of successive linear interpolation"

The root-finding algorithm there that mixes of secant and bisection steps is also discussed as algorithm A in

  • Bus, Dekker (1975), "Two Efficient Algorithms with Guaranteed Convergence for Finding a Zero of a Function"

The main thrust of that article is to discuss modifications of the basic idea using roots of non-linear approximations of the function as iteration points.

Wikipedia's Brent article only refers to the conference report Dekker (1969) and other reviews from the perspective of the Brent algorithm. So for Dekker alone it is a dubious source, as both sources are likely to have simplified the algorithm to its basic idea.

The algorithm

In extension of the bisection and other bracketing methods, the state of Dekkers method has 3 points a,b,c. Their functions are that from a,b the next secant root is computed and [b,c] is a bracketing interval, that is, the values at b,c have a different sign, ensuring the presence of a root in the interval. Additionally, b is the "best" point, has the smallest function value among the three points. The algorithm enforces that the new point is inside [b,c] closer to b.

At the start of the method, the interval [a,b] is given and extended to a "triangle" [a,b,c] = [a,b,a]. The points get sorted so that b has the smaller value. After selecting the next point v inside [b,c], the secant sequence get shifted to * a,b = b,v*. Then the new bracketing interval gets selected. The sign sequence of the values at the new a,b,c has one alternation. If it is again between b and c, nothing needs to be done, the sequence a,b,c remains "linear". Otherwise the alternation is between a and b, so set c=a as the new counter-point.

The midpoint selection has 3 variants:

  • In "secant" mode, the secant root v=s is inside of the bracketing interval, moreover between the midpoint m and b. In the majority of situations, the value at s has the same sign as the value at b and a significantly smaller size, so that the other interval end c remains unchanged and the secant root sequence can be continued without break, a,b = b,s. A sign change between the values at b and v=s is less likely, but still quite possible.

  • In "midpoint" mode it was detected that the secant root sequence diverges from the bracketing interval, either falling outside or making an unexpected large step away from b to the other side of m. In this case it is more likely that the value of the midpoint is not smaller than the previous values, or that the sign changes between the values at b and v=m

  • a "minimal" step is performed to resolve numerically ambiguous situations. For instance if the secant root sequence converges to step sizes below the tolerances, one or several minimal steps are used to also shrink the bracketing interval to this minimal size. At convergence at a multiple root where there is no sign change, which is numerically ambiguous, one or more minimal steps will move this root outside the bracketing interval and restart the search for a root with a sign change.

Applied to the test problem

The trace of the Dekker method for this equation should look like

a=x[-1], b=x[-1], c=x[-1] -> initial   : x[ 0] =  -3.000000000000000,  f(x[ 0]) =  -7.010007503399553
a=x[ 0], b=x[-1], c=x[-1] -> initial   : x[ 1] =   1.000000000000000,  f(x[ 1]) =   3.459697694131860
a=x[ 0], b=x[ 1], c=x[ 0] -> secant    : x[ 2] =  -0.321793738737783,  f(x[ 2]) =   0.225110648393170
a=x[ 1], b=x[ 2], c=x[ 0] -> secant    : x[ 3] =  -0.413783795385445,  f(x[ 3]) =   0.355981221390157
a=x[ 2], b=x[ 3], c=x[ 0] -> midpoint  : x[ 4] =  -1.706891897692722,  f(x[ 4]) =   1.989640412053239
a=x[ 3], b=x[ 4], c=x[ 0] -> midpoint  : x[ 5] =  -2.353445948846361,  f(x[ 5]) =  -0.252473245321568
a=x[ 4], b=x[ 5], c=x[ 4] -> secant    : x[ 6] =  -2.280640719265942,  f(x[ 6]) =   0.192012983736426
a=x[ 5], b=x[ 6], c=x[ 5] -> secant    : x[ 7] =  -2.312091748453008,  f(x[ 7]) =   0.006873812772492
a=x[ 6], b=x[ 7], c=x[ 5] -> secant    : x[ 8] =  -2.313259456426466,  f(x[ 8]) =  -0.000199582032968
a=x[ 7], b=x[ 8], c=x[ 7] -> secant    : x[ 9] =  -2.313226508522713,  f(x[ 9]) =   0.000000197276950
a=x[ 8], b=x[ 9], c=x[ 8] -> secant    : x[10] =  -2.313226541057924,  f(x[10]) =   0.000000000005652
a=x[ 9], b=x[10], c=x[ 8] -> minimal   : x[11] =  -2.313226541058924,  f(x[11]) =  -0.000000000000412
exit by interval length
a=x[10], b=x[11], c=x[10] -> best value: x[11] =  -2.313226541058924,  f(x[11]) =  -0.000000000000412

The use of a,b,c is taken from the Dekker/Bus paper. Here c is the counter-point with value of opposite sign to the current best point b, and a is usually the former best point if the last step was a secant step.

Note the regular sign switch in the function value which indicates a substantial shrinking of the bracketing interval [b,c].


The code, with all the facilities to print out the trace of the algorithm, is

from math import sin, cos, exp

def dekker(f_in, a, b, tol=1e-7):
    mode = "initial"
    A=B=C=None
    
    class Point(object):
        k=0
        def __init__(self, x):
            self.x = x
            self.k = Point.k
            self.f = f_in(x); 
            a_str = "       " if not A else f"a=x[{A.k:2d}]"
            b_str = "       " if not B else f"b=x[{B.k:2d}]"
            c_str = "       " if not C else f"c=x[{C.k:2d}]"
            print(f"{a_str}, {b_str}, {c_str} -> {mode:10s}:", end='')
            print(f" x[{self.k:2d}] = {self.x:19.15f},  f(x[{self.k:2d}]) = {self.f:19.15f}"); 
            Point.k+=1; 
 
    A = Point(a)
    B = Point(b)
    if A.f*B.f > 0: 
        error("need sign change in initial interval")
    # [a,b] is bracketing interval, establish triangle formation, [b,c] now also bracketing
    C = A
    while 1>0:
        # make b the best. In superlinear mode nothing should happen here
        if abs(C.f) < abs(B.f): 
            A, B, C = B, C, B    # triangle formation
        # atomic step size
        db = tol
        if abs(C.x-B.x)<2*db: print("exit by interval length"); break
        # minimal change in case of stalling
        h = B.x + (db if C.x>B.x else -db);
        # elements of the secant step
        p = B.f*(A.x-B.x); q = (B.f-A.f);
        if p<0: p,q = -p,-q
        # default for midpoint step
        v = m = 0.5*(B.x+C.x); mode = "midpoint"
        # secant step if step update is large enough
        if p > abs(db*q):
            s = B.x+p/q
            # step goes not beyond midpoint, else remain at midpoint
            if p < q*(v-B.x): v = s; mode = "secant"
        else: v=h; mode = "minimal"
        # shift the secant sequence
        A, B = B, Point(v);
        # Now a < b=v <= m < c or in the other direction. Between a and c there is a sign change
        # If the sign also changes between b and c, do nothing, linear formation remains.
        # Else copy a to c, establishing a triangle formation
        if B.f*C.f > 0:
            C = A
        # Now [b,c] is ensured to be bracketing and (a,b) is ready for the next secant root computation
    mode = "best value"
    Point.k = B.k; Point(B.x);    # this is just for some nice printing
    return B.x, C.x
        
dekker(lambda x: -cos(x) + x*x*x + 2*x*x + 1, -3, 1, tol=1e-12)