How to fit 2 gauss in python

136 views Asked by At

I am a new user of Python. I am trying to fit 2 Gaussians with data but there are some errors in the results.

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import scipy as scipy
from scipy import optimize
from matplotlib.ticker import AutoMinorLocator
from matplotlib import gridspec
import matplotlib.ticker as ticker
%matplotlib inline

data = np.loadtxt('csv/test_run09.csv', encoding="utf-8", delimiter=',',skiprows=1)
x = data[:,1]
y1 = data[:,2]
y2 = data[:,3]
y3 = data[:,4]
y4 = data[:,5]
y5 = data[:,6]
y6 = data[:,7]
y7 = data[:,8]
y8 = data[:,9]
y9 = data[:,10]
y10 = data[:,11]
y11 = data[:,12]
y12 = data[:,13]
y13 = data[:,14]
y14 = data[:,15]

amp1 = 2
sigma1 = 0.1
x_array = x[(x>33)&(x<34)]
y_array = y14[(x>33)&(x<34)]

amp2 = np.max(y_array)
sigma2 = np.std(x_array)

def _1gaussian1(x_array, amp1, sigma1):
    return amp1*(1/(sigma1*(np.sqrt(2*np.pi))))*(np.exp((-1.0/2.0)*(((x_array-\  33.49290958)/sigma1)**2))) + 0.2
def _1gaussian2(x_array, amp2, sigma2):
    return amp2*(1/(sigma2*(np.sqrt(2*np.pi))))*(np.exp((-1.0/2.0)*(((x_array-\    33.6312849)/sigma2)**2))) + 0.2

popt_gauss1, pcov_gauss1 = scipy.optimize.curve_fit(_1gaussian1, x_array, y_array,     p0=[amp1, sigma1])
popt_gauss2, pcov_gauss2 = scipy.optimize.curve_fit(_1gaussian2, x_array, y_array,     p0=[np.max(y_array), np.std(x_array)])

def _2gaussian(x_array, amp1, sigma1, amp2, sigma2):
    return amp1*(1/(sigma1*(np.sqrt(2*np.pi))))*(np.exp((-1.0/2.0)*(((x_array-    33.49290958)/sigma1)**2))) + amp2*(1/(sigma1*(np.sqrt(2*np.pi))))*(np.exp((-1.0/2.0)*(((x_array-33.6312849)/sigma2)**2))) + 0.3

popt_2gauss, pcov_2gauss = scipy.optimize.curve_fit(_2gaussian, x_array, y_array,         p0=[amp1, sigma1, np.max(y_array), np.std(x_array)])
perr_2gauss = np.sqrt(np.diag(pcov_2gauss))
print(popt_2gauss)

pars_1 = popt_2gauss[0:2]
pars_2 = popt_2gauss[2:4]
gauss_peak_1 = _1gaussian1(x_array, *pars_1)
gauss_peak_2 = _1gaussian2(x_array, *pars_2)

fig = plt.figure(figsize=(7,5))
gs = gridspec.GridSpec(1,1)
ax1 = fig.add_subplot(gs[0])
plt.grid()

ax1.plot(x_array, y_array, "ro")
ax1.plot(x_array, _2gaussian(x_array, *popt_2gauss), 'k--')#,\

# # peak 1
ax1.plot(x_array, gauss_peak_1, "g")
ax1.fill_between(x_array, gauss_peak_1.min(), gauss_peak_1, facecolor="green",         alpha=0.5)

# # peak 2
ax1.plot(x_array, gauss_peak_2, "y")
ax1.fill_between(x_array, gauss_peak_2.min(), gauss_peak_2, facecolor="yellow",\                  alpha=0.5)  

# prints the fitting parameters with their errors
print("-------------Peak 1-------------")
print("amplitude = %0.2f (+/-) %0.2f" % (pars_1[0], perr_2gauss[0]))
print("sigma = %0.2f (+/-) %0.2f" % (pars_1[1], perr_2gauss[1]))
print("area = %0.2f" % np.trapz(gauss_peak_1))
print("-------------Peak 2-------------")
print("amplitude = %0.2f (+/-) %0.2f" % (pars_2[0], perr_2gauss[2]))
print("sigma = %0.2f (+/-) %0.2f" % (pars_2[1], perr_2gauss[3]))
print("area = %0.2f" % np.trapz(gauss_peak_2))

This is the result. I can plot the gauss fitting but the 2nd gauss seems to be wrong because the shape is much larger than the data. What should I do in this case?

enter image description here

1

There are 1 answers

1
Reinderien On

Don't rewrite _1gaussian2 as a near-identical implementation of _1gaussian1.

Don't hard-code your time offsets - unless these come from real experimental settings, leave them as degrees of freedom for your fit. Same for the 0.2 vertical offset.

Don't throw away your power data (the column headings). If I interpret them correctly, they're positive or negative power in milliwatts.

Do a parametric fit for every power level; you'll see that the parameters you had fixed should actually be variable.

Don't hard-code the slice 33-34. Use the whole data. Model the function as a simultaneous sum of two Gaussians.

import re
from typing import Iterator

import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit


sqrt2pi = np.sqrt(2 * np.pi)


def parse_power(filename: str) -> Iterator[float]:
    pat = re.compile(r'(neg|pos)_(\d+)_(\d+)mW')
    with open(filename) as f:
        header = next(f)
        for match in pat.finditer(header):
            magnitude = 1e-3 * float(match.expand(r'\2.\3'))
            if match.group(1) == 'neg':
                yield -magnitude
            else:
                yield magnitude


def gaussian(theta: np.ndarray, amp: float, offx: float, sigma: float) -> np.ndarray:
    return amp * np.exp(-0.5 * ((theta - offx) / sigma)**2)


def double_gaussian(
    theta: np.ndarray,
    amp1: float, amp2: float,
    offx1: float, offx2: float,
    sigma1: float, sigma2: float,
    offy: float) -> np.ndarray:
    return (
        offy
        + gaussian(theta, amp1, offx1, sigma1)
        + gaussian(theta, amp2, offx2, sigma2)
    )


def main() -> None:
    filename = 'test_run09.csv'
    powers = np.array(tuple(parse_power(filename)))
    order = powers.argsort()
    powers = powers[order]
    data = np.loadtxt(filename, encoding="utf-8", delimiter=',', skiprows=1)
    theta = data[:, 1]
    # Sort columns by power
    data[:, 2:] = data[:, 2:][:, order]

    amp01 = 3
    amp02 = 9
    offx01 = 32.2
    offx02 = 33.5
    sigma01 = 0.1
    sigma02 = 0.1
    offy0 = 0.2
    init = (amp01, amp02, offx01, offx02, sigma01, sigma02, offy0)

    parameters = np.empty((len(powers), len(init)))

    for j in range(2, data.shape[1]):
        y = data[:, j]

        result, _ = curve_fit(
            f=double_gaussian,
            xdata=theta,
            ydata=y,
            p0=init,
        )
        parameters[j-2, :] = result

        def each_fit_plot():
            plt.figure()
            plt.scatter(theta, y)
            plt.plot(theta, double_gaussian(theta, *result), 'orange')
        # each_fit_plot()

    def plot_param(ylabel, start, end):
        plt.figure()
        plt.plot(powers, parameters[:, start:end])
        plt.title('Parameters, first and second Gaussian pulse')
        plt.xlabel('Power')
        plt.ylabel(ylabel)

    plot_param('Amplitude', 0, 2)
    plot_param('Peak time', 2, 4)
    plot_param('Sigma', 4, 6)
    plot_param('Shared vertical offset', 6, 7)

    plt.show()


if __name__ == '__main__':
    main()

parameters

Curve fit

If you generalise to a pseudogaussian pulse replacing **2 with abs()**p, the fit improves greatly:

pseudogaussian