Why does VarHandle::getAcquire have so much overhead?

145 views Asked by At

In C/C++ stdatomics on x86 provides release-acquire semantic which is free due to x86 memory model. So the point of using atomic_load_explicit(&i, memory_order_acquire); and atomic_store_explicit(&i, 0, memory_order_release); is that it does not require any memory barrier instruction to ensure correct release-acquire semantics on x86.

In Java there's also VarHandle::getAcquire and VarHandle::setRelease which roughly speaking provides the same memory semantic. The problem is when trying to measure VarHandle::getAcquire I figured out that it introduces a lot of overhead which essentially destroys the whole point of the optimization. Here is the benchmark:

@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Fork(value = 1)
@BenchmarkMode(org.openjdk.jmh.annotations.Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Benchmark)
public class Benchmarks {

    private int variable = 0;

    private static VarHandle VAR_HANDLE;
    static {
        try {
            MethodHandles.Lookup l = MethodHandles.lookup();
            VAR_HANDLE = l.findVarHandle(Benchmarks.class, "variable", int.class);
        } catch (ReflectiveOperationException e) {
            throw new Error(e);
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    public void readAcquire(Blackhole bh) {
        bh.consume(VAR_HANDLE.getAcquire(this));
    }
}

so prof perfasm shows pretty much lot of noise like:

mov     0x20(%r12,%r8,8),%r9d  ;*aaload {reexecute=0 rethrow=0 return_oop=0}
                           ; - java.lang.invoke.VarHandle::getMethodHandle@10 (line 1979)
                           ; - java.lang.invoke.VarHandleGuards::guard_L_L@50 (line 40)
                           ; - com.test.Benchmarks::readAcquire@5 (line 33)
                           ; - com.test.jmh_generated.Benchmarks_readAcquire_jmhTest::readAcquire_avgt_jmhStub@17 (line 190)
 mov     0x10(%r12,%r9,8),%ecx  ;*getfield type {reexecute=0 rethrow=0 return_oop=0}
                           ; - java.lang.invoke.MethodHandle::asType@2 (line 839)
                           ; - java.lang.invoke.VarHandleGuards::guard_L_L@59 (line 41)
                           ; - com.test.Benchmarks::readAcquire@5 (line 33)
                           ; - com.test.jmh_generated.Benchmarks_readAcquire_jmhTest::readAcquire_avgt_jmhStub@17 (line 190)
                           ; implicit exception: dispatches to 0x00007f50282ef40c
 cmp     $0xe2d7411d,%ecx  ;   {oop(a 'java/lang/invoke/MethodType'{0x0000000716ba08e8} = (Ljava/lang/invoke/VarHandle;Lcom/test/Benchmarks;)Ljava/lang/Object;)}
 je      0x7f50282ef318    ;*if_acmpne {reexecute=0 rethrow=0 return_oop=0}
                           ; - java.lang.invoke.MethodHandle::asType@5 (line 839)
                           ; - java.lang.invoke.VarHandleGuards::guard_L_L@59 (line 41)
                           ; - com.test.Benchmarks::readAcquire@5 (line 33)
                           ; - com.test.jmh_generated.Benchmarks_readAcquire_jmhTest::readAcquire_avgt_jmhStub@17 (line 190)
 mov     0x18(%r12,%r9,8),%r10d  ;*getfield asTypeCache {reexecute=0 rethrow=0 return_oop=0}
                           ; - java.lang.invoke.MethodHandle::asTypeCached@1 (line 851)
                           ; - java.lang.invoke.MethodHandle::asType@12 (line 843)
                           ; - java.lang.invoke.VarHandleGuards::guard_L_L@59 (line 41)
                           ; - com.test.Benchmarks::readAcquire@5 (line 33)
                           ; - com.test.jmh_generated.Benchmarks_readAcquire_jmhTest::readAcquire_avgt_jmhStub@17 (line 190)
 mov     0x10(%r12,%r10,8),%r8d  ;*getfield type {reexecute=0 rethrow=0 return_oop=0}
                           ; - java.lang.invoke.MethodHandle::asTypeCached@11 (line 852)
                           ; - java.lang.invoke.MethodHandle::asType@12 (line 843)
                           ; - java.lang.invoke.VarHandleGuards::guard_L_L@59 (line 41)
                           ; - com.test.Benchmarks::readAcquire@5 (line 33)
                           ; - com.test.jmh_generated.Benchmarks_readAcquire_jmhTest::readAcquire_avgt_jmhStub@17 (line 190)
                           ; implicit exception: dispatches to 0x00007f50282ef428

 cmp     $0xe2d7411d,%r8d  ;   {oop(a 'java/lang/invoke/MethodType'{0x0000000716ba08e8} = (Ljava/lang/invoke/VarHandle;Lcom/test/Benchmarks;)Ljava/lang/Object;)}
 jne     0x7f50282ef2e4    ;*if_acmpne {reexecute=0 rethrow=0 return_oop=0}
                           ; - java.lang.invoke.MethodHandle::asTypeCached@14 (line 852)
                           ; - java.lang.invoke.MethodHandle::asType@12 (line 843)
                           ; - java.lang.invoke.VarHandleGuards::guard_L_L@59 (line 41)
                           ; - com.test.Benchmarks::readAcquire@5 (line 33)
                           ; - com.test.jmh_generated.Benchmarks_readAcquire_jmhTest::readAcquire_avgt_jmhStub@17 (line 190)
 lea     (%r12,%r11,8),%rdx  ;*getstatic VAR_HANDLE {reexecute=0 rethrow=0 return_oop=0}
                           ; - com.test.Benchmarks::readAcquire@1 (line 33)
                           ; - com.test.jmh_generated.Benchmarks_readAcquire_jmhTest::readAcquire_avgt_jmhStub@17 (line 190)
 lea     (%r12,%r10,8),%rsi  ;*getfield asTypeCache {reexecute=0 rethrow=0 return_oop=0}
                           ; - java.lang.invoke.MethodHandle::asTypeCached@1 (line 851)
                           ; - java.lang.invoke.MethodHandle::asType@12 (line 843)
                           ; - java.lang.invoke.VarHandleGuards::guard_L_L@59 (line 41)
                           ; - com.test.Benchmarks::readAcquire@5 (line 33)
                           ; - com.test.jmh_generated.Benchmarks_readAcquire_jmhTest::readAcquire_avgt_jmhStub@17 (line 190)

or

callq   0x7fa21883fd80
  ;*invokevirtual invokeBasic {reexecute=0 rethrow=0 return_oop=1}
  ;java.lang.invoke.VarHandleGuards::guard_L_L@64 (line 41)
  ;com.test.Benchmarks::readAcquire@5 (line 32)
  ;com.test.jmh_generated.Benchmarks_readAcquire_jmhTest::readAcquire_avgt_jmhStub@17 (line 190)

where the VarHandleGuard::guard__L looks as

static final Object guard__L(VarHandle handle, VarHandle.AccessDescriptor ad) throws Throwable {
    handle.checkExactAccessMode(ad);
    if (handle.isDirect() && handle.vform.methodType_table[ad.type] == ad.symbolicMethodTypeErased) {
        Object r = MethodHandle.linkToStatic(handle, handle.vform.getMemberName(ad.mode));
        return ad.returnType.cast(r);
    } else {
        MethodHandle mh = handle.getMethodHandle(ad.mode);
        return mh.asType(ad.symbolicMethodTypeInvoker).invokeBasic(handle.asDirect());
    }
}

which seems to be called implicitly on any VarHandle invocation.


In Java, volatile provides sequential-consistency guarantees, and essentially reading a volatile variable does not require extra memory barrier on x86 (unlike other ISAs), but writing to a volatile variable usually compiles to an extra lock addl $0x0, (%rsp) inserted after memory write instruction. (C++ compilers use xchg instead of mov for seq_cst stores, instead of a separate mfence or locked instruction. Slightly cheaper, but the full barrier is the main cost.)

Why does VarHandle::getAcquire introduce so much noise and is it possible to use it instead of volatile for optimizing for latency?

0

There are 0 answers