Operator SDK controller failed to update Custom Resource status

3.2k views Asked by At

I'm following this tutorial to create my first Custom Resource named PodSet and currently at step 6 of 7 to test my CR.

Here is my Operator SDK controller Go code:

package controllers

import (
    "context"
    "reflect"

    "github.com/go-logr/logr"
    "k8s.io/apimachinery/pkg/labels"
    "k8s.io/apimachinery/pkg/runtime"
    ctrl "sigs.k8s.io/controller-runtime"
    "sigs.k8s.io/controller-runtime/pkg/client"
    "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    "sigs.k8s.io/controller-runtime/pkg/reconcile"

    appv1alpha1 "github.com/redhat/podset-operator/api/v1alpha1"
    corev1 "k8s.io/api/core/v1"
    "k8s.io/apimachinery/pkg/api/errors"
    metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// PodSetReconciler reconciles a PodSet object
type PodSetReconciler struct {
    client.Client
    Log    logr.Logger
    Scheme *runtime.Scheme
}

// +kubebuilder:rbac:groups=app.example.com,resources=podsets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=app.example.com,resources=podsets/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=v1,resources=pods,verbs=get;list;watch;create;update;patch;delete

// Reconcile is the core logic of controller
func (r *PodSetReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
    _ = context.Background()
    _ = r.Log.WithValues("podset", req.NamespacedName)

    // Fetch the PodSet instance (the parent of the pods)
    instance := &appv1alpha1.PodSet{}
    err := r.Get(context.Background(), req.NamespacedName, instance)
    if err != nil {
        if errors.IsNotFound(err) {
            // Request object not found, could have been deleted after reconcile request.
            // Owned objects are automatically garbage collected. For additional cleanup logic use finalizers.
            // Return and don't requeue
            return reconcile.Result{}, nil
        }
        // Error reading the object - requeue the request
        return reconcile.Result{}, err
    }

    // List all pods owned by this PodSet instance
    podSet := instance
    podList := &corev1.PodList{}
    labelz := map[string]string{
        "app":     podSet.Name, // the metadata.name field from user's CR PodSet YAML file
        "version": "v0.1",
    }
    labelSelector := labels.SelectorFromSet(labelz)
    listOpts := &client.ListOptions{Namespace: podSet.Namespace, LabelSelector: labelSelector}
    if err = r.List(context.Background(), podList, listOpts); err != nil {
        return reconcile.Result{}, err
    }

    // Count the pods that are pending or running and add them to available array
    var available []corev1.Pod
    for _, pod := range podList.Items {
        if pod.ObjectMeta.DeletionTimestamp != nil {
            continue
        }
        if pod.Status.Phase == corev1.PodRunning || pod.Status.Phase == corev1.PodPending {
            available = append(available, pod)
        }
    }
    numAvailable := int32(len(available))
    availableNames := []string{}
    for _, pod := range available {
        availableNames = append(availableNames, pod.ObjectMeta.Name)
    }

    // Update the status if necessary
    status := appv1alpha1.PodSetStatus{
        PodNames:          availableNames,
        AvailableReplicas: numAvailable,
    }
    if !reflect.DeepEqual(podSet.Status, status) {
        podSet.Status = status
        err = r.Status().Update(context.Background(), podSet)
        if err != nil {
            r.Log.Error(err, "Failed to update PodSet status")
            return reconcile.Result{}, err
        }
    }

    // When the number of pods in the cluster is bigger that what we want, scale down
    if numAvailable > podSet.Spec.Replicas {
        r.Log.Info("Scaling down pods", "Currently available", numAvailable, "Required replicas", podSet.Spec.Replicas)
        diff := numAvailable - podSet.Spec.Replicas
        toDeletePods := available[:diff] // Syntax help: https://play.golang.org/p/SHAMCdd12sp
        for _, toDeletePod := range toDeletePods {
            err = r.Delete(context.Background(), &toDeletePod)
            if err != nil {
                r.Log.Error(err, "Failed to delete pod", "pod.name", toDeletePod.Name)
                return reconcile.Result{}, err
            }
        }
        return reconcile.Result{Requeue: true}, nil
    }

    // When the number of pods in the cluster is smaller that what we want, scale up
    if numAvailable < podSet.Spec.Replicas {
        r.Log.Info("Scaling up pods", "Currently available", numAvailable, "Required replicas", podSet.Spec.Replicas)
        // Define a new Pod object
        pod := newPodForCR(podSet)
        // Set PodSet instance as the owner of the Pod
        if err := controllerutil.SetControllerReference(podSet, pod, r.Scheme); err != nil {
            return reconcile.Result{}, err
        }
        err = r.Create(context.Background(), pod)
        if err != nil {
            r.Log.Error(err, "Failed to create pod", "pod.name", pod.Name)
            return reconcile.Result{}, err
        }
        return reconcile.Result{Requeue: true}, nil
    }

    return ctrl.Result{}, nil
}

// newPodForCR returns a busybox pod with the same name/namespace as the cr
func newPodForCR(cr *appv1alpha1.PodSet) *corev1.Pod {
    labels := map[string]string{
        "app":     cr.Name, // the metadata.name field from user's CR PodSet YAML file
        "version": "v0.1",
    }
    return &corev1.Pod{
        ObjectMeta: metav1.ObjectMeta{
            GenerateName: cr.Name + "-pod",
            Namespace:    cr.Namespace,
            Labels:       labels,
        },
        Spec: corev1.PodSpec{
            Containers: []corev1.Container{
                {
                    Name:    "busybox",
                    Image:   "busybox",
                    Command: []string{"sleep", "3600"},
                },
            },
        },
    }
}

// SetupWithManager defines how the controller will watch for resources
func (r *PodSetReconciler) SetupWithManager(mgr ctrl.Manager) error {
    return ctrl.NewControllerManagedBy(mgr).
        For(&appv1alpha1.PodSet{}).
        Owns(&corev1.Pod{}).
        Complete(r)
}

When I apply below YAML file, I saw strange behaviour of the pods. They were struggling in the first few seconds — some of them get up and running for a while and quickly get into terminating state. When I leave them untouched for few more seconds, the CR reached the desired state just fine.

apiVersion: app.example.com/v1alpha1
kind: PodSet
metadata:
  name: podset-sample
spec:
  replicas: 5

I captured the deployment scene above in this video. And here are the full logs from my local terminal running WATCH_NAMESPACE=podset-operator make run command (sorry, I have to use Pastebin because SO didn't allow me to paste the full logs here because they are too long).

So, my questions here are:

  1. What does the Failed to update PodSet status {"error": "Operation cannot be fulfilled on podsets.app.example.com \"podset-sample\": the object has been modified; please apply your changes to the latest version and try again"} actually means?
  2. Why this happened?
  3. What can I do to get rid of these errors?
2

There are 2 answers

3
Muhammad Rehan Javed On

You can also use patch instead of update to avoid this.

1
Alejandro Jesus Nuñez Madrazo On

You need get the object before update, that happens because you have old version of the object when you try to update.

EDIT:

podSet := &appv1alpha1.PodSet{}
err := r.Get(context.Background(), req.NamespacedName, podSet)
if err != nil {
  return reconcile.Result{}, err
}

// Update the status if necessary
status := appv1alpha1.PodSetStatus{
    PodNames:          availableNames,
    AvailableReplicas: numAvailable,
}
if !reflect.DeepEqual(podSet.Status, status) {
   podSet.Status = status
   err = r.Status().Update(context.Background(), podSet)
   if err != nil {
      r.Log.Error(err, "Failed to update PodSet status")
      return reconcile.Result{}, err
  }
}

you have to bring the latest version of the object from kubernetes to make sure you have the latest version of it