0

I do a lot of hashing in hot path; these hashing function looks something like this: f(input) mod size. I do not know size beforehand. When tested, I got a significant speedup from compiler optimization when the size was known during compile. I would like to get such a speedup also for my situation.

I solved that with expression trees, but I got another idea of how to solve the problem. The solution, sadly, works only partially. I show an example of dividing.

public interface IConstant
{
    long GetValue();
}

public struct Two: IConstant
{
    public long GetValue()
    {
        return 2;
    }
}

interface IDivisor
{
    long Divide(long[] x);
}

class DivideBy<TDivisor> : IDivisor
    where TDivisor: struct, IConstant
{
    readonly static TDivisor _divisor = default(TDivisor);
    public long Divide(long[] data)

    {
        long answer = 0;
        for (int i = 0; i < data.Length; i++)
        {
            answer += data[i] / _divisor.GetValue();

        }
        return answer;
    }
}

Divisor<Two> divisor = default(Divisor<Two>);
divide.Divide(data)

This code performs the same as if the divisor were known in advance. When I tried runtime types, I got no optimization.

public class RuntimeTypeCreator<T>
    where T : struct
{

    static IDictionary<long, Type> cache = new Dictionary<long, Type>();

    public static Type Get(long value)
    {
        if (cache.TryGetValue(value, out var type))
        {
            return type;
        }
        type = CreateRuntimeConstant(value);
        cache[value] = type;
        return type;
    }
    public static Type CreateRuntimeConstant(long value)
    {
        // Define a dynamic assembly and module
        AssemblyName assemblyName = new AssemblyName("DynamicAssembly");
        AssemblyBuilder assemblyBuilder = AssemblyBuilder.DefineDynamicAssembly(assemblyName, AssemblyBuilderAccess.Run);
        ModuleBuilder moduleBuilder = assemblyBuilder.DefineDynamicModule("DynamicModule");

        // Define a public struct named "DynamicType"
        TypeBuilder tb = moduleBuilder.DefineType($"long_{value}",
            TypeAttributes.Public | TypeAttributes.Sealed | TypeAttributes.SequentialLayout | TypeAttributes.BeforeFieldInit,
            typeof(ValueType),
            new Type[] { typeof(IConstant) });

        // Define the get_Value method to implement the IConstant interface
        var method = tb.DefineMethod("GetValue", MethodAttributes.Public | MethodAttributes.Virtual, typeof(long), Type.EmptyTypes);
        var il = method.GetILGenerator();
        il.Emit(OpCodes.Ldc_I8, value);
        il.Emit(OpCodes.Ret);
        tb.DefineMethodOverride(method, typeof(IConstant).GetMethod("GetValue"));

        // Create the type
        return tb.CreateType();
    }
}

var type = RuntimeTypeCreator<int>.CreateRuntimeConstant(2);
divisor = (IDivisor)Activator.CreateInstance(typeof(DivideBy<>).MakeGenericType(type));
divisor.Divide(data);

I think the problem is in declaring the method GetValue as virtual, but without the declaration the the code fails with this error: System.TypeLoadException: Method 'GetValue' on type 'long_2' from assembly 'DynamicAssembly, Version=0.0.0.0, Culture=neutral, PublicKeyToken=null' must be virtual to implement a method on an interface or super type.

Bechmarks

public class Benchmark
{

    long[] data;
    IDivisor divisor;
    IDivisor divisorExactType = new DivideBy<Two>();

    [GlobalSetup]
    public void Setup()
    {
        data = new long[100000];
        Random random = new Random();
        for (int i = 0; i < data.Length; i++)
        {
            data[i] = random.NextInt64();
        }

        var type = RuntimeTypeCreator<int>.CreateRuntimeConstant(2);
        divisor = (IDivisor)Activator.CreateInstance(typeof(DivideBy<>).MakeGenericType(type));
    }


    [Benchmark]
    public long DivideRuntimeType()
    {
        return divisor.GetValue(data);
    }

    [Benchmark]
    public long DivideExactType()
    {
        return divisorExactType.GetValue(data);
    }


    [Benchmark]
    [Arguments(2)]
    public long DivideExact(long x)
    {
        long answer = 0;
        for (int i = 0; i < data.Length; i++)
        {
            answer += data[i] / 2;

        }
        return answer;

    }
}

Results

| Method            | x | Mean        | Error     | StdDev     |
|------------------ |-- |------------:|----------:|-----------:|
| DivideRuntimeType | ? | 1,099.36 us | 37.430 us | 106.789 us |
| DivideExactType   | ? |    75.05 us |  1.498 us |   2.100 us |
| DivideExact       | 2 |    92.11 us |  1.829 us |   3.156 us |

Whole code:

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System.Reflection;
using System.Reflection.Emit;


public interface IConstant
{
    long GetValue();
}

public struct Two : IConstant
{
    public long GetValue()
    {
        return 2;
    }
}
public class RuntimeTypeCreator<T>
    where T : struct
{

    static IDictionary<long, Type> cache = new Dictionary<long, Type>();

    public static Type Get(long value)
    {
        if (cache.TryGetValue(value, out var type))
        {
            return type;
        }
        type = CreateRuntimeConstant(value);
        cache[value] = type;
        return type;
    }
    public static Type CreateRuntimeConstant(long value)
    {
        // Define a dynamic assembly and module
        AssemblyName assemblyName = new AssemblyName("DynamicAssembly");
        AssemblyBuilder assemblyBuilder = AssemblyBuilder.DefineDynamicAssembly(assemblyName, AssemblyBuilderAccess.Run);
        ModuleBuilder moduleBuilder = assemblyBuilder.DefineDynamicModule("DynamicModule");

        // Define a public struct named "DynamicType"
        TypeBuilder tb = moduleBuilder.DefineType($"long_{value}",
            TypeAttributes.Public | TypeAttributes.Sealed | TypeAttributes.SequentialLayout | TypeAttributes.BeforeFieldInit,
            typeof(ValueType),
            new Type[] { typeof(IConstant) });

        // Define the get_Value method to implement the IConstant interface
        var method = tb.DefineMethod("GetValue", MethodAttributes.Public | MethodAttributes.Virtual, typeof(long), Type.EmptyTypes);
        var il = method.GetILGenerator();
        il.Emit(OpCodes.Ldc_I8, value);
        il.Emit(OpCodes.Ret);
        tb.DefineMethodOverride(method, typeof(IConstant).GetMethod("GetValue"));

        // Create the type
        return tb.CreateType();
    }

}


public class Divide<TA, TB> : IConstant
where TA : IConstant
where TB : IConstant
{

    public long GetValue()
    {
        TA a = default(TA);
        TB b = default(TB);
        return a.GetValue() / b.GetValue();
    }
}

public class Benchmark
{

    long[] data;
    IDivisor divisor;
    IDivisor divisorExactType = new DivideBy<Two>();

    [GlobalSetup]
    public void Setup()
    {
        data = new long[100000];
        Random random = new Random();
        for (int i = 0; i < data.Length; i++)
        {
            data[i] = random.NextInt64();
        }

        var type = RuntimeTypeCreator<int>.CreateRuntimeConstant(2);
        divisor = (IDivisor)Activator.CreateInstance(typeof(DivideBy<>).MakeGenericType(type));
    }


    [Benchmark]
    public long DivideRuntimeType()
    {
        return divisor.GetValue(data);
    }

    [Benchmark]
    public long DivideExactType()
    {
        return divisorExactType.GetValue(data);
    }


    [Benchmark]
    public long DivideByNumber()
    {
        long answer = 0;
        for (int i = 0; i < data.Length; i++)
        {
            answer += data[i] / 2;

        }
        return answer;

    }
}

public interface IDivisor
{
    long GetValue(long[] x);
}

public struct DivideBy<TDivisor> : IDivisor
        where TDivisor : struct, IConstant
{
    readonly static TDivisor _divisor = default(TDivisor);
    public long GetValue(long[] data)

    {
        long answer = 0;
        for (int i = 0; i < data.Length; i++)
        {
            answer += data[i] / _divisor.GetValue();

        }
        return answer;
    }
}


public class Program
{
    public static void Main()
    {
        var summary = BenchmarkRunner.Run<Benchmark>();
    }
}
4
  • Please show how you benchmark your code. And ideally add full minimal reproducible example so no guesswork is needed. Commented May 26, 2024 at 20:39
  • 1
    Also in terms of (micro)optimizations I would suggest to try looking into moving _divisor.GetValue() outside the loop, so no need to guess if compiler/jitter will correctly optimize the call. Commented May 26, 2024 at 20:41
  • Can you make the size a power of two? You can optimize the modulo a lot in that case by using & (but that will not "mix" the value up the way modulo usually does, so you may need to strengthen your hash .. bad as that sounds, it can be cheaper than a division) Commented May 26, 2024 at 23:48
  • No, that´s not an option, as this increases cash misses by having an unnecessarily large table. The optimized division is nearly as fast as &. Commented May 27, 2024 at 7:49

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.