How to break a direct reference cycle in CPython

613 views Asked by At

In CPython I have two types of objects, which are close connected to each other.

#include <Python.h>
#include <structmember.h>

typedef struct pyt PYT;
struct pyt { PyObject_HEAD PYT *other; };

static void dealloc (PYT *self) {
    Py_CLEAR(self->other);
    printf("dealloc object at %p\n", self);
    PyObject_GC_Del(self);
}

static PyTypeObject Pyt2Type = {
    PyObject_HEAD_INIT(NULL)
    0, "pyt.Pyt2", sizeof(PYT), 0,
    (destructor) dealloc
};

static PyObject * new (PyTypeObject *type, PyObject *args, PyObject *kwds) {
    PYT *self = PyObject_GC_New(PYT, type);
    if (!self) return NULL;
    self->other = PyObject_GC_New(PYT, &Pyt2Type);
    if (!self->other) { Py_DECREF(self); return NULL; }
    return Py_INCREF(self), self->other->other = self, (PyObject *) self;
}

static PyTypeObject Pyt1Type = {
    PyObject_HEAD_INIT(NULL)
    0, "pyt.Pyt1", sizeof(PYT), 0,
    (destructor) dealloc
};

static int traverse (PYT *self, visitproc visit, void *arg) {
    Py_VISIT(self->other);
    return 0;
}

static int clear (PYT *self) {
    Py_CLEAR(self->other);
    return 0;
}

static PyMemberDef members[] = {
    {"other", T_OBJECT, offsetof(PYT, other), RO, "other"},
    { NULL }
};

static PyMethodDef methods[] = {{ NULL }};

PyMODINIT_FUNC initpyt ( void ) {
    PyObject* m;

    Pyt1Type.tp_flags = Pyt2Type.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC;
    Pyt1Type.tp_traverse = Pyt2Type.tp_traverse = (traverseproc) traverse;
    Pyt1Type.tp_clear = Pyt2Type.tp_clear = (inquiry) clear;
    Pyt1Type.tp_members = Pyt2Type.tp_members = members;
    Pyt1Type.tp_new = new;

    if (PyType_Ready(&Pyt1Type) < 0) return;
    if (PyType_Ready(&Pyt2Type) < 0) return;

    m = Py_InitModule("pyt", methods);

    Py_INCREF(&Pyt1Type), PyModule_AddObject(m, "Pyt", (PyObject *) &Pyt1Type);
}

Using my test script

from distutils.core import Extension, setup
import sys, gc
sys.argv.extend(["build_ext", "-i"])
setup(ext_modules = [Extension('pyt', ['pyt.c'])])
from pyt import Pyt
pyt = Pyt()
print pyt, sys.getrefcount(pyt)
pyt = pyt.other
print pyt, sys.getrefcount(pyt)
del pyt
gc.collect()

I get an output like

<pyt.Pyt1 object at 0x7fbc26540138> 3
<pyt.Pyt2 object at 0x7fbc26540150> 3

The objects are not deleted at the end, since each keeps a reference to the other, creating a closed cycle. In other code I was using an approach, where I just kept the objects, until both have a refcount of 0, which I suspect being bad practice. Now I have tried using the Garbage Collector here, but still the objects are not collected.

What is going wrong here? What did I miss?

3

There are 3 answers

0
tynn On BEST ANSWER

Ok, I finally found my problem. I didn't start tracking with PyObject_GC_Track.

Python requires some steps, when using the Garbage Collector:

  • adding Py_TPFLAGS_HAVE_GC to tp_flags
  • adding a tp_traverse and, if needed, a tp_clear functions
  • object creation with PyObject_GC_New or a similar function
  • calling PyObject_GC_Track on the fully initialised object
  • object deletion with PyObject_GC_Del or a similar function

So here modifying the new function will suffice.

static PyObject * new (PyTypeObject *type, PyObject *args, PyObject *kwds) {
    PYT *self = PyObject_GC_New(PYT, type);
    if (!self) return NULL;
    self->other = PyObject_GC_New(PYT, &Pyt2Type);
    if (!self->other) { Py_DECREF(self); return NULL; }
    self->other->other = (Py_INCREF(self), self);
    PyObject_GC_Track((PyObject *) self);
    PyObject_GC_Track((PyObject *) self->other);
    return (PyObject *) self;
}

With an output of

<pyt.Pyt1 object at 0x7f4904fe1398> 4
<pyt.Pyt2 object at 0x7f4904fe15c8> 4
dealloc object at 0x7f4904fe15c8
dealloc object at 0x7f4904fe1398
1
Kevin On

You can do this using weak references (see the weakref module). But it's usually better to just rely on the garbage collector. It's possible someone else will create a large reference cycle involving your objects, and then you'll be relying on the GC anyway, so you may as well use it for the simple case.

Please explain what you mean by "failed badly."

1
Dunes On

An important thing to note about (most) garbage collected languages is that deleting of an object is not guaranteed to happen as soon as an object becomes unreachable. Once an object becomes unreachable it is entirely up to the garbage collector as to when it will release the associated resources, which could be as late as when the program ends if there is no pressure for memory.

If you don't set __del__ methods for your linked classes then the garbage collector should work fine. It won't immediately clean up your objects as the function to detect reference cycles is more costly than simple reference counting, and is as such is run infrequently.

Example using a pure python class

import gc
import weakref

class Obj(object): pass

x = Obj()
y = Obj()

x.y = y, y.x = x

ref = weakref.ref(x)

print(ref())
del x, y
print(ref())
gc.collect()
print(ref())

Outputs:

<__main__.Obj object at 0x7f81c8ccc7b8>
<__main__.Obj object at 0x7f81c8ccc7b8>
None