Gradient Descent Cost Function Blows up after locating minimum value

44 views Asked by At

I'm working on a multivariate optimization problem using the gradient descent algorithm. The algorithm does an okay job, but I noticed the cost function is not following a monotonic descending trend as desired.

It blows up right after the minimal value.

My suspicion is this has to do with step size bing too big. My dilemma is that the algorithm will be slow if the step size is too small. It will step out of the solution space if it's too big. enter image description here

Can someone provide some advice on what would be a preferred approach? Any suggestion is greatly appreciated.

here is my implemention of the optimizer part

class spdg:
    def __init__(self, loss_function,
                 # a, c,
                 alpha_val,
                 gamma_val, max_iter, img_target, zernike,
                 momentum = 0.2,
                 cal_tolerance=1e-6):
        # Initialize gain parameters and decay factors
        # self.a = a
        # self.c = c
        self.alpha_val = alpha_val
        self.gamma_val = gamma_val

        self.loss = loss_function

        self.max_iter = max_iter
        self.A = max_iter / 10
        self.img_target = img_target
        self.zernike = zernike

        self.initial_entropy = image_entropy(self.img_target)
        self.momentum = momentum
        self.cal_tolerance = cal_tolerance

    def calc_loss(self, current_Aw):
        # print('current_Aw: %s' %str(current_Aw))

        Az = self.update_AZ(current_Aw)
        # print(Az[0][0])
        # print('Az: %s' %str(Az))
        """Evaluate the cost/loss function with a value of theta"""
        return self.loss(Az, self.img_target)

    def update_AZ(self, current_Aw):
        return zernike_plane(current_Aw, self.zernike)

    def minimise(self, current_Aw, optimizer_type='vanilla', verbose=False):
        k = 0  # initialize count
        cost_func_val = []
        Aw_values = []
        vk = 0

        previous_Aw = 0
        # while k < self.max_iter:

        while k < self.max_iter and \
                np.linalg.norm(previous_Aw - current_Aw) > self.cal_tolerance:

            previous_Aw = current_Aw
            cost_val = self.calc_loss(current_Aw)

            # get the current values for gain sequences
            # a_k = self.a / (k + 1 + self.A) ** self.alpha_val
            # c_k = self.c / (k + 1) ** self.gamma_val
            if verbose:
                print('iteration %d: %s with cost function value %.2f' % (k, str(current_Aw), cost_val))
            else:
                pass


            # get the random perturbation vector Bernoulli distribution with p=0.5
            delta = (np.random.randint(0, 2, current_Aw.shape) * 2 - 1) * self.gamma_val


            Aw_plus = current_Aw + delta
            Aw_minus = current_Aw - delta

            # measure the loss function at perturbations
            loss_plus = self.calc_loss(Aw_plus)
            loss_minus = self.calc_loss(Aw_minus)

            loss_delta = (loss_plus - loss_minus)
            # Aw_delta = Aw_plus - Aw_minus

            # compute the estimate of the gradient
            g_hat = loss_delta * delta

            # # update the estimate of the parameter
            if optimizer_type == 'vanilla':
                current_Aw = current_Aw - self.alpha_val * g_hat

            elif optimizer_type == 'momentum':

                vk_next = self.alpha_val * g_hat + self.momentum * vk
                current_Aw = current_Aw - vk_next
                vk = vk_next
            else:
                pass


            cost_val = self.calc_loss(current_Aw)
            cost_func_val.append(cost_val.squeeze())
            Aw_values.append(current_Aw)

            k += 1

        sol_idx = np.argmin(cost_func_val)
        Aw_estimate = Aw_values[sol_idx]
        print('optimal solution is found at %d iteration' % sol_idx)


        return Aw_estimate, cost_func_val
0

There are 0 answers