Gaussian fit with consideration of uncertainties

1.2k views Asked by At

I'm having trouble understandig what is wrong with the following piece of code:

import numpy as np
import matplotlib.pyplot as plt
from scipy.odr import *

def gauss(p,x):
    return p[0]*np.exp(-(x-p[1])**2/(2*p[2]**2)+p[4]) + p[3]

# Create a model for fitting.
gg = Model(gauss)

x = np.arange(0, 350)

# Create a RealData object using our initiated data from above.
data = RealData(x, y_data, sx=0, sy=y_data_err)

# Set up ODR with the model and data.
odr = ODR(data, gg, beta0=[0.1, 1., 1.0, 1.0, 1.0])


# Run the regression.
out = odr.run()

# Use the in-built pprint method to give us results.
out.pprint()

x_fit = np.linspace(x[0], x[-1], 1000)
y_fit = gauss(out.beta, x_fit)

plt.figure()
plt.errorbar(x, xy_data xerr=0, yerr=y_data_err, linestyle='None', marker='x')
plt.plot(x_fit, y_fit)

plt.show()

This was straight up copied from here with only changing the model. The error that I get is

scipy.odr.odrpack.odr_error: number of observations do not match

But as far as I can tell beta0 has five parameters, which is exactly as many as gauss needs to work. Would be great if someone could point to the error-source or my misconception.

1

There are 1 answers

0
James Phillips On BEST ANSWER

Here is a graphing fitter with your equation, comparing both ODR and curve_fit on one graph. The example uses scipy's differential_evolution genetic algorithm module to determine initial parameter estimates for the solvers, and that module implements the Latin Hypercube algorithm to ensure a thorough search of parameter space which requires bounds within which to search. In this example, those bounds are taken from the data maximum and minimum values. As your post did not include data, I have used my own test data in the example. In this example the two fitted curves look very similar, diverging slightly at the plotted extremes. comparison

import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import scipy.odr
from scipy.optimize import differential_evolution
import warnings

xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])


def func(x, a, b, c, d, offset): # curve fitting function for curve_fit()
    return a*numpy.exp(-(x-b)**2/(2*c**2)+d) + offset


def func_wrapper_for_ODR(parameters, x): # parameter order for ODR
    return func(x, *parameters)


# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
    warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
    val = func(xData, *parameterTuple)
    return numpy.sum((yData - val) ** 2.0)


def generate_Initial_Parameters():
    # min and max used for bounds
    maxX = max(xData)
    minX = min(xData)
    maxY = max(yData)
    minY = min(yData)

    parameterBounds = []
    parameterBounds.append([minY, maxY]) # search bounds for a
    parameterBounds.append([minX, maxX]) # search bounds for b
    parameterBounds.append([minX, maxX]) # search bounds for c
    parameterBounds.append([minY, maxY]) # search bounds for d
    parameterBounds.append([0.0, maxY]) # search bounds for Offset

    # "seed" the numpy random number generator for repeatable results
    result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
    return result.x

geneticParameters = generate_Initial_Parameters()


##########################
# curve_fit section
##########################

fittedParameters_curvefit, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters curve_fit:', fittedParameters_curvefit)
print()

modelPredictions_curvefit = func(xData, *fittedParameters_curvefit) 

absError_curvefit = modelPredictions_curvefit - yData

SE_curvefit = numpy.square(absError_curvefit) # squared errors
MSE_curvefit = numpy.mean(SE_curvefit) # mean squared errors
RMSE_curvefit = numpy.sqrt(MSE_curvefit) # Root Mean Squared Error, RMSE
Rsquared_curvefit = 1.0 - (numpy.var(absError_curvefit) / numpy.var(yData))

print()
print('RMSE curve_fit:', RMSE_curvefit)
print('R-squared curve_fit:', Rsquared_curvefit)

print()


##########################
# ODR section
##########################
data = scipy.odr.odrpack.Data(xData,yData)
model = scipy.odr.odrpack.Model(func_wrapper_for_ODR)
odr = scipy.odr.odrpack.ODR(data, model, beta0=geneticParameters)

# Run the regression.
odr_out = odr.run()

print('Fitted parameters ODR:', odr_out.beta)
print()

modelPredictions_odr = func(xData, *odr_out.beta) 

absError_odr = modelPredictions_odr - yData

SE_odr = numpy.square(absError_odr) # squared errors
MSE_odr = numpy.mean(SE_odr) # mean squared errors
RMSE_odr = numpy.sqrt(MSE_odr) # Root Mean Squared Error, RMSE
Rsquared_odr = 1.0 - (numpy.var(absError_odr) / numpy.var(yData))

print()
print('RMSE ODR:', RMSE_odr)
print('R-squared ODR:', Rsquared_odr)

print()


##########################################################
# graphics output section
def ModelsAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    axes.plot(xData, yData,  'D')

    # create data for the fitted equation plots
    xModel = numpy.linspace(min(xData), max(xData))
    yModel_curvefit = func(xModel, *fittedParameters_curvefit)
    yModel_odr = func(xModel, *odr_out.beta)

    # now the models as line plots
    axes.plot(xModel, yModel_curvefit)
    axes.plot(xModel, yModel_odr)

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelsAndScatterPlot(graphWidth, graphHeight)