CoreML's predictionFromFeatures function allocates an output buffer when it performs its inference. Is there any way I can allocate a buffer beforehand, and then have the model write to it instead of creating a new one?
Thanks!
Code:
#include <iostream>
#include <ctime>
#include <CoreML/CoreML.h>
MLModel * model;
void handle_errors(NSError *error) {
if (error != nil) {
NSString *formatted = [NSString stringWithFormat:@"%@", [error userInfo]];
throw std::runtime_error([formatted UTF8String]);
}
}
MLMultiArray * create_array () {
NSError *error = nil;
int N = 3000;
// Create a 2D multiarray with dimension 3 x 3.
NSArray<NSNumber *> *shape3x3 = @[@3000, @3000];
MLMultiArray *multiarray3x3 = [[MLMultiArray alloc] initWithShape:shape3x3 dataType:MLMultiArrayDataTypeFloat error: &error];
handle_errors(error);
// Initialize the multiarray.
for (int x = 0; x < N; x++) {
for (int y = 0; y < N; y++) {
NSNumber *xSubscript = [NSNumber numberWithInt:x];
NSNumber *ySubscript = [NSNumber numberWithInt:y];
[multiarray3x3 setObject:@3.14159
forKeyedSubscript:@[xSubscript, ySubscript]];
}
}
return multiarray3x3;
}
int main () {
@autoreleasepool {
NSError *error = nil;
NSURL *specUrl = [NSURL URLWithString:@"../model.mlpackage"];
NSURL *compiledUrl = [MLModel compileModelAtURL:specUrl error:&error];
MLModelConfiguration *configuration = [MLModelConfiguration new];
configuration.computeUnits = MLComputeUnitsAll;
MLModel * model = [MLModel modelWithContentsOfURL:compiledUrl configuration:configuration error:&error];
handle_errors(error);
MLMultiArray * A = create_array();
MLMultiArray * B = create_array();
NSDictionary<NSString *, id> *featureDictionary = @{
@"A": A,
@"B": B,
};
MLDictionaryFeatureProvider *inFeatures = [[MLDictionaryFeatureProvider alloc] initWithDictionary:featureDictionary error:&error];
handle_errors(error);
double total_time = 0;
for (int i = 0; i < 100; i++) {
clock_t start = clock();
id<MLFeatureProvider> outFeatures = [model predictionFromFeatures:static_cast<MLDictionaryFeatureProvider * _Nonnull>(inFeatures)
error:&error];
clock_t end = clock();
double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
total_time += cpu_time_used;
}
handle_errors(error);
std::cout << "Time taken: " << total_time / 100 << std::endl;
}
}
I've tried surfing the web for help, including the Apple docs. My use case is that I want to create a small fast program that does just one operation (such as softmax or matrix multiplication) and profile it against a pure CPU-based C implementation. I don't want to include allocation time into my measurements.