Update CoreML model on device gives negative MSE loss

Question

Update CoreML model on device gives negative MSE loss

47 views Asked by bcxiao At 26 November 2023 at 06:54

I converted a toy Pytorch regression model to CoreML mlmodel using coremltools and set it to be updatable with mean_squared_error_loss. But when testing the training, the context.metrics[.lossValue] can give negative value and is very different from my own computed loss as shown in the screenshot below. I was wondering if I used a wrong way to extract the training loss? Does context.metrics[.lossValue] really give MSE if I used coremltools function set_mean_squared_error_loss to set the loss? Any suggestion is appreciated.

I am using coremltools==7.0, xcode==15.0.1

let progressHandlers = MLUpdateProgressHandlers(forEvents: [.trainingBegin, .epochEnd],
            progressHandler: { context in
                switch context.event {
                    case .trainingBegin:
                        print("Training began.")
                    case .epochEnd:
                        let loss = context.metrics[.lossValue] as! Double
                        lossValues.append(loss)
                        let validationLoss = computeValidationLoss(model: context.model, validationData: validationData)
                        validationLossValues.append(validationLoss)
                        print("Epoch \(context.metrics[.epochIndex]!) ended. Training Loss: \(loss), Validation Loss: \(validationLoss)")
                    print("\(context.metrics[.lossValue]!)")
                    default:
                        break
                }
            },
            completionHandler: { context in
                if let error = context.task.error {
                    print("Update task failed with error: \(error)")
                } else {
                    print("Update task completed")
                }
            }
        )

Here is my code to convert Pytorch model to updatable CoreML model:

import torch
import torch.optim as optim
import torch.nn as nn
import coremltools as ct

# Define a simple neural network with two layers
class SimpleRegressionModel(nn.Module):
    def __init__(self):
        super(SimpleRegressionModel, self).__init__()
        self.layer1 = nn.Linear(2, 5) # 2 inputs, 5 outputs
        self.layer2 = nn.Linear(5, 1)  # 5 inputs, 1 output

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = self.layer2(x)
        return x

# Create the model
model = SimpleRegressionModel()

# Create a sample input tensor
sample_input = torch.rand(1, 2)  # Adjust the shape according to your model's input

# Trace the model with a sample input
traced_model = torch.jit.trace(model, sample_input)

# Convert the traced model to Core ML format
input_features = [ct.TensorType(shape=(1, 2))]
output_features = ["output"]
mlmodel = ct.convert(
    traced_model,
    inputs=input_features,
    convert_to="neuralnetwork"
)

mlmodel.save("regression.mlmodel")


import coremltools
from coremltools.models.neural_network import NeuralNetworkBuilder, SgdParams, AdamParams
from coremltools.models import datatypes

# Load the model specification
spec = coremltools.utils.load_spec('regression.mlmodel')
builder = NeuralNetworkBuilder(spec=spec)

builder.inspect_output_features() # Name: linear_1
          
# Make layers updatable
builder.make_updatable(['linear_0', 'linear_1'])

# Manually add a mean squared error loss layer
feature = ('linear_1', datatypes.Array(1))
builder.set_mean_squared_error_loss(name='lossLayer', input_feature=feature)


# Define the optimizer (SGD in this example)
# sgd_params = SgdParams(lr=0.001, batch=16)  # Adjust learning rate and batch size as needed
# builder.set_sgd_optimizer(sgd_params)

# define the optimizer (Adam in this example)
adam_params = AdamParams(lr=0.01, beta1=0.9, beta2=0.999, eps=1e-8, batch=16)
builder.set_adam_optimizer(adam_params)

# Set the number of epochs
builder.set_epochs(100)

# Optionally, set descriptions for your training inputs
spec.description.trainingInput[0].shortDescription = 'Input data'
spec.description.trainingInput[1].shortDescription = 'Target output data'

# Save the updated model
updated_model = coremltools.models.MLModel(spec)
updated_model.save('updatable_regression30.mlmodel')

Here is the code I use to try to update the saved updatable_regression30.mlmodel:

import CoreML

import GameKit

func generateSampleData(numSamples: Int, seed: UInt64) -> ([MLMultiArray], [MLMultiArray]) {
    var inputArray = [MLMultiArray]()
    var outputArray = [MLMultiArray]()

    // Create a random number generator with a fixed seed
    let randomSource = GKLinearCongruentialRandomSource(seed: seed)
    let randomDistribution = GKRandomDistribution(randomSource: randomSource, lowestValue: 0, highestValue: 1000)

    for _ in 0..<numSamples {
        do {
            let input = try MLMultiArray(shape: [1, 2], dataType: .float32)
            let output = try MLMultiArray(shape: [1], dataType: .float32)

            var sumInput: Float = 0

            for i in 0..<input.shape[1].intValue {
                // Generate random value using the fixed seed generator
                let inputValue = Float(randomDistribution.nextInt()) / 1000.0
                input[[0, i] as [NSNumber]] = NSNumber(value: inputValue)
                sumInput += inputValue
            }

            output[0] = NSNumber(value: 10.0 * sumInput + 1.0)

            inputArray.append(input)
            outputArray.append(output)
        } catch {
            print("Error occurred while creating MLMultiArrays: \(error)")
        }
    }

    return (inputArray, outputArray)
}

func computeLoss(model: MLModel, data: ([MLMultiArray], [MLMultiArray])) -> Double {
    let (inputData, outputData) = data
    var totalLoss: Double = 0

    for (index, input) in inputData.enumerated() {
        let output = outputData[index]
        
        // Using optional binding to safely unwrap the prediction
        if let prediction = try? model.prediction(from: MLDictionaryFeatureProvider(dictionary: ["x": MLFeatureValue(multiArray: input)])),
           let predictedOutput = prediction.featureValue(for: "linear_1")?.multiArrayValue {
            
            // Now you can safely use predictedOutput
            let loss = (output[0].doubleValue - predictedOutput[0].doubleValue)
            totalLoss += loss * loss // Mean squared error
        }
    }
    return totalLoss / Double(inputData.count) // Calculating the mean of squared errors
}


func trainModel() {
    // Load the updatable model
    guard let updatableModelURL = Bundle.main.url(forResource: "updatable_regression30", withExtension: "mlmodelc") else {
        print("Failed to load the updatable model")
        return
    }

    // Generate sample data
    let (inputData, outputData) = generateSampleData(numSamples: 200, seed: 8)
    
    let validationData = generateSampleData(numSamples: 100, seed:18)

    // Create an MLArrayBatchProvider from the sample data
    var featureProviders = [MLFeatureProvider]()
    for (index, input) in inputData.enumerated() {
        let output = outputData[index]
        let dataPointFeatures: [String: MLFeatureValue] = [
            "x": MLFeatureValue(multiArray: input),
            "linear_1_true": MLFeatureValue(multiArray: output)
        ]
        if let provider = try? MLDictionaryFeatureProvider(dictionary: dataPointFeatures) {
            featureProviders.append(provider)
        }
    }
    let batchProvider = MLArrayBatchProvider(array: featureProviders)
    
    
    // Define progress handlers
    var lossValues: [Double] = []
    var computedTrainingLossValues: [Double] = []
    var validationLossValues: [Double] = []

        let progressHandlers = MLUpdateProgressHandlers(forEvents: [.trainingBegin, .epochEnd],
            progressHandler: { context in
                switch context.event {
                    case .trainingBegin:
                        print("Training began.")
                    case .epochEnd:
                        let loss = context.metrics[.lossValue] as! Double
                        lossValues.append(loss)
                        let validationLoss = computeLoss(model: context.model, data: validationData)
                        validationLossValues.append(validationLoss)
                        let computedTrainLoss = computeLoss(model: context.model, data: (inputData, outputData))
                        computedTrainingLossValues.append(validationLoss)
                        print("Epoch \(context.metrics[.epochIndex]!) ended. Training Loss: \(loss), Computed Training Loss: \(computedTrainLoss), Validation Loss: \(validationLoss)")
                    print("\(context.metrics[.lossValue]!)")
                    default:
                        break
                }
            },
            completionHandler: { context in
                if let error = context.task.error {
                    print("Update task failed with error: \(error)")
                } else {
                    let updatedModel = context.model
                    do {
                        let fileManager = FileManager.default
                        let documentDirectory = try fileManager.url(for: .documentDirectory, in: .userDomainMask, appropriateFor:nil, create:true)
                        let fileURL = documentDirectory.appendingPathComponent("CatDog5.mlmodelc")
                        try updatedModel.write(to: fileURL)
                        print("Model updated and saved successfully to \(fileURL)")
                    } catch {
                        print("Failed to save the updated model: \(error)")
                    }
                }
            }
        )

        // Create an update task with progress handlers
        let updateTask = try! MLUpdateTask(forModelAt: updatableModelURL,
                                           trainingData: batchProvider,
                                           configuration: nil,
                                           progressHandlers: progressHandlers)
    
    // Start the update task
    updateTask.resume()
}

trainModel()

Original Q&A

TechQA.

Update CoreML model on device gives negative MSE loss

There are 0 answers

Related Questions in IOS

Related Questions in SWIFT

Related Questions in DEEP-LEARNING

Related Questions in COREML

Related Questions in COREMLTOOLS

Popular Questions

Popular Tags

Trending Questions