c# Weird Method Inlining Behavior

153 views Asked by At

I noticed an odd inlining behavior when decompiling a code that I wrote.
What I noticed is that a method will not get inlined UNLESS it is in a loop.
The weird thing is that the equivalent of this method, written in a non-generic way is getting inlined always.

The code:

using System;
using System.Runtime.CompilerServices;
using SharpLab.Runtime;

[JitGeneric(typeof(int))]
public static class GenericOps<T> where T : unmanaged
{
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public static bool Less(T left, T right)
    {
        if (typeof(T) == typeof(byte)) return (byte)(object)left < (byte)(object)right;
        if (typeof(T) == typeof(sbyte)) return (sbyte)(object)left < (sbyte)(object)right;
        if (typeof(T) == typeof(ushort)) return (ushort)(object)left < (ushort)(object)right;
        if (typeof(T) == typeof(short)) return (short)(object)left < (short)(object)right;
        if (typeof(T) == typeof(uint)) return (uint)(object)left < (uint)(object)right;
        if (typeof(T) == typeof(int)) return (int)(object)left < (int)(object)right;
        if (typeof(T) == typeof(ulong)) return (ulong)(object)left < (ulong)(object)right;
        if (typeof(T) == typeof(long)) return (long)(object)left < (long)(object)right;
        if (typeof(T) == typeof(float)) return (float)(object)left < (float)(object)right;
        if (typeof(T) == typeof(double)) return (double)(object)left < (double)(object)right;

        throw new NotSupportedException(typeof(T).Name);
    }

    public static T Min(T left, T right)
    {
        return Less(left, right) ? left : right;
    }
}


public static class IntOps
{    
    public static bool Less(int left, int right)
    {
        return left < right;
    }

    public static int Min(int left, int right)
    {
        return Less(left, right) ? left : right;
    }
}

[JitGeneric(typeof(int))]
public static class C<T> where T : unmanaged
{      
    public static T M2(T a, T b)
    {
        return GenericOps<T>.Min(a, b);
    }
    
    public static T M2Loop(Span<T> a, Span<T> b)
    {
        T num = default;
        
        for(int i = 0; i < a.Length; i++)
        {
            num = GenericOps<T>.Min(a[i], b[i]);
        }
        
        return num;
    }
}

Odd behavior that I noticed:

  • Inside GenericOps.Min(), GenericOps.Less() IS NOT inlined, opposed to IntOps.Min() where IntOps.Less() IS inlined.
  • Inside C.M2(), GenericOps<T>.Min IS NOT inlined, opposed to C.M2Loop where GenericOps<T>.Min IS inlined.

Decompilation was done using SharpLab on X64 platform, Core CLR v4.700.20.41105 on amd64

Decompiled JIT:

Microsoft.CodeAnalysis.EmbeddedAttribute..ctor()
    L0000: ret

System.Runtime.CompilerServices.IsUnmanagedAttribute..ctor()
    L0000: ret

GenericOps`1[[System.Int32, System.Private.CoreLib]].Less(Int32, Int32)
    L0000: cmp ecx, edx
    L0002: setl al
    L0005: movzx eax, al
    L0008: ret

GenericOps`1[[System.Int32, System.Private.CoreLib]].Min(Int32, Int32)
    L0000: push rdi
    L0001: push rsi
    L0002: sub rsp, 0x28
    L0006: mov esi, ecx
    L0008: mov edi, edx
    L000a: mov ecx, esi
    L000c: mov edx, edi
    L000e: call GenericOps`1[[System.Int32, System.Private.CoreLib]].Less(Int32, Int32)
    L0013: test eax, eax
    L0015: jne short L0020
    L0017: mov eax, edi
    L0019: add rsp, 0x28
    L001d: pop rsi
    L001e: pop rdi
    L001f: ret
    L0020: mov eax, esi
    L0022: add rsp, 0x28
    L0026: pop rsi
    L0027: pop rdi
    L0028: ret

IntOps.Less(Int32, Int32)
    L0000: cmp ecx, edx
    L0002: setl al
    L0005: movzx eax, al
    L0008: ret

IntOps.Min(Int32, Int32)
    L0000: cmp ecx, edx
    L0002: jl short L0007
    L0004: mov eax, edx
    L0006: ret
    L0007: mov eax, ecx
    L0009: ret
    
C`1[[System.Int32, System.Private.CoreLib]].M2(Int32, Int32)
    L0000: push rdi
    L0001: push rsi
    L0002: sub rsp, 0x28
    L0006: mov esi, ecx
    L0008: mov edi, edx
    L000a: mov ecx, esi
    L000c: mov edx, edi
    L000e: call GenericOps`1[[System.Int32, System.Private.CoreLib]].Less(Int32, Int32)
    L0013: test eax, eax
    L0015: jne short L001b
    L0017: mov eax, edi
    L0019: jmp short L001d
    L001b: mov eax, esi
    L001d: add rsp, 0x28
    L0021: pop rsi
    L0022: pop rdi
    L0023: ret
    
C`1[[System.Int32, System.Private.CoreLib]].M2Loop(System.Span`1<Int32>, System.Span`1<Int32>)
    L0000: push rsi
    L0001: sub rsp, 0x20
    L0005: mov r8, [rcx]
    L0008: mov ecx, [rcx+8]
    L000b: xor eax, eax
    L000d: xor r9d, r9d
    L0010: test ecx, ecx
    L0012: jle short L0046
    L0014: mov r10d, [rdx+8]
    L0018: mov rdx, [rdx]
    L001b: movsxd rax, r9d
    L001e: mov r11d, [r8+rax*4]
    L0022: cmp r9d, r10d
    L0025: jae short L004c
    L0027: mov eax, [rdx+rax*4]
    L002a: cmp r11d, eax
    L002d: setl sil
    L0031: movzx esi, sil
    L0035: test esi, esi
    L0037: jne short L003b
    L0039: jmp short L003e
    L003b: mov eax, r11d
    L003e: inc r9d
    L0041: cmp r9d, ecx
    L0044: jl short L001b
    L0046: add rsp, 0x20
    L004a: pop rsi
    L004b: ret
    L004c: call 0x00007ffbf05bfc60
    L0051: int3

Can anyone offer an explanation for this weird behavoir?
And is there a way to guarantee that GenericOps.Less() and GenericOps.Min() will always get inlined?

1

There are 1 answers

1
Tanveer Badar On

Your first point is easy to explain. throw statements prevent inlining. Refer to .net source code to see how they move throwing exceptions out of main method body by using a ThrowHelper class. Move the throw statement to some other method to increase your chances of inlining. Nothing ever guarantees it.

As for the other point, I am guessing the jit determines the call overhead will be large enough that inlining within the loop body will more than offset the increased code size.