I noticed an odd inlining behavior when decompiling a code that I wrote.
What I noticed is that a method will not get inlined UNLESS it is in a loop.
The weird thing is that the equivalent of this method, written in a non-generic way is getting inlined always.
The code:
using System;
using System.Runtime.CompilerServices;
using SharpLab.Runtime;
[JitGeneric(typeof(int))]
public static class GenericOps<T> where T : unmanaged
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool Less(T left, T right)
{
if (typeof(T) == typeof(byte)) return (byte)(object)left < (byte)(object)right;
if (typeof(T) == typeof(sbyte)) return (sbyte)(object)left < (sbyte)(object)right;
if (typeof(T) == typeof(ushort)) return (ushort)(object)left < (ushort)(object)right;
if (typeof(T) == typeof(short)) return (short)(object)left < (short)(object)right;
if (typeof(T) == typeof(uint)) return (uint)(object)left < (uint)(object)right;
if (typeof(T) == typeof(int)) return (int)(object)left < (int)(object)right;
if (typeof(T) == typeof(ulong)) return (ulong)(object)left < (ulong)(object)right;
if (typeof(T) == typeof(long)) return (long)(object)left < (long)(object)right;
if (typeof(T) == typeof(float)) return (float)(object)left < (float)(object)right;
if (typeof(T) == typeof(double)) return (double)(object)left < (double)(object)right;
throw new NotSupportedException(typeof(T).Name);
}
public static T Min(T left, T right)
{
return Less(left, right) ? left : right;
}
}
public static class IntOps
{
public static bool Less(int left, int right)
{
return left < right;
}
public static int Min(int left, int right)
{
return Less(left, right) ? left : right;
}
}
[JitGeneric(typeof(int))]
public static class C<T> where T : unmanaged
{
public static T M2(T a, T b)
{
return GenericOps<T>.Min(a, b);
}
public static T M2Loop(Span<T> a, Span<T> b)
{
T num = default;
for(int i = 0; i < a.Length; i++)
{
num = GenericOps<T>.Min(a[i], b[i]);
}
return num;
}
}
Odd behavior that I noticed:
- Inside
GenericOps.Min()
,GenericOps.Less()
IS NOT inlined, opposed toIntOps.Min()
whereIntOps.Less()
IS inlined. - Inside
C.M2()
,GenericOps<T>.Min
IS NOT inlined, opposed toC.M2Loop
whereGenericOps<T>.Min
IS inlined.
Decompilation was done using SharpLab on X64
platform, Core CLR v4.700.20.41105 on amd64
Decompiled JIT:
Microsoft.CodeAnalysis.EmbeddedAttribute..ctor()
L0000: ret
System.Runtime.CompilerServices.IsUnmanagedAttribute..ctor()
L0000: ret
GenericOps`1[[System.Int32, System.Private.CoreLib]].Less(Int32, Int32)
L0000: cmp ecx, edx
L0002: setl al
L0005: movzx eax, al
L0008: ret
GenericOps`1[[System.Int32, System.Private.CoreLib]].Min(Int32, Int32)
L0000: push rdi
L0001: push rsi
L0002: sub rsp, 0x28
L0006: mov esi, ecx
L0008: mov edi, edx
L000a: mov ecx, esi
L000c: mov edx, edi
L000e: call GenericOps`1[[System.Int32, System.Private.CoreLib]].Less(Int32, Int32)
L0013: test eax, eax
L0015: jne short L0020
L0017: mov eax, edi
L0019: add rsp, 0x28
L001d: pop rsi
L001e: pop rdi
L001f: ret
L0020: mov eax, esi
L0022: add rsp, 0x28
L0026: pop rsi
L0027: pop rdi
L0028: ret
IntOps.Less(Int32, Int32)
L0000: cmp ecx, edx
L0002: setl al
L0005: movzx eax, al
L0008: ret
IntOps.Min(Int32, Int32)
L0000: cmp ecx, edx
L0002: jl short L0007
L0004: mov eax, edx
L0006: ret
L0007: mov eax, ecx
L0009: ret
C`1[[System.Int32, System.Private.CoreLib]].M2(Int32, Int32)
L0000: push rdi
L0001: push rsi
L0002: sub rsp, 0x28
L0006: mov esi, ecx
L0008: mov edi, edx
L000a: mov ecx, esi
L000c: mov edx, edi
L000e: call GenericOps`1[[System.Int32, System.Private.CoreLib]].Less(Int32, Int32)
L0013: test eax, eax
L0015: jne short L001b
L0017: mov eax, edi
L0019: jmp short L001d
L001b: mov eax, esi
L001d: add rsp, 0x28
L0021: pop rsi
L0022: pop rdi
L0023: ret
C`1[[System.Int32, System.Private.CoreLib]].M2Loop(System.Span`1<Int32>, System.Span`1<Int32>)
L0000: push rsi
L0001: sub rsp, 0x20
L0005: mov r8, [rcx]
L0008: mov ecx, [rcx+8]
L000b: xor eax, eax
L000d: xor r9d, r9d
L0010: test ecx, ecx
L0012: jle short L0046
L0014: mov r10d, [rdx+8]
L0018: mov rdx, [rdx]
L001b: movsxd rax, r9d
L001e: mov r11d, [r8+rax*4]
L0022: cmp r9d, r10d
L0025: jae short L004c
L0027: mov eax, [rdx+rax*4]
L002a: cmp r11d, eax
L002d: setl sil
L0031: movzx esi, sil
L0035: test esi, esi
L0037: jne short L003b
L0039: jmp short L003e
L003b: mov eax, r11d
L003e: inc r9d
L0041: cmp r9d, ecx
L0044: jl short L001b
L0046: add rsp, 0x20
L004a: pop rsi
L004b: ret
L004c: call 0x00007ffbf05bfc60
L0051: int3
Can anyone offer an explanation for this weird behavoir?
And is there a way to guarantee that GenericOps.Less()
and GenericOps.Min()
will always get inlined?
Your first point is easy to explain.
throw
statements prevent inlining. Refer to .net source code to see how they move throwing exceptions out of main method body by using aThrowHelper
class. Move the throw statement to some other method to increase your chances of inlining. Nothing ever guarantees it.As for the other point, I am guessing the jit determines the call overhead will be large enough that inlining within the loop body will more than offset the increased code size.