Python 2D Gaussian Fit with NaN Values in Data

4.7k views Asked by At

I'm very new to Python but I'm trying to produce a 2D Gaussian fit for some data. Specifically, stellar fluxes linked to certain positions in a coordinate system/grid. However not all of the positions in my grid have corresponding flux values. I don't really want to set these values to zero in case it biases my fit, but I can't seem to set them to nan and still get my Gaussian fit to work. This is the code I'm using (modified slightly from here):

import numpy
import scipy
from numpy import *
from scipy import optimize

def gaussian(height, center_x, center_y, width_x, width_y):
    width_x = float(width_x)
    width_y = float(width_y)
    return lambda x,y: height*exp(-(((center_x-x)/width_x)**2+((center_y-y)/width_y)**2)/2)

def moments(data):
    total = nansum(data)
    X, Y = indices(data.shape)
    center_x = nansum(X*data)/total
    center_y = nansum(Y*data)/total
    row = data[int(center_x), :]
    col = data[:, int(center_y)]
    width_x = nansum(sqrt(abs((arange(col.size)-center_y)**2*col))/nansum(col))
    width_y = nansum(sqrt(abs((arange(row.size)-center_x)**2*row))/nansum(row))
    height = nanmax(data)
    return height, center_x, center_y, width_x, width_y

def fitgaussian(data):
    params = moments(data)
    errorfunction = lambda p: ravel(gaussian(*p)(*indices(data.shape)) - data)
    p, success = optimize.leastsq(errorfunction, params)
    return p

parameters = fitgaussian(data)
fit = gaussian(*parameters)

My flux values are in a 2D array called data. The code works if I have 0 instead of nan values in this array, but otherwise my parameters always come out as [nan nan nan nan nan]. If there's a way to fix this, I would really appreciate your insight! The more detailed the explanation, the better. Thanks in advance!

2

There are 2 answers

0
unutbu On BEST ANSWER

The obvious thing to do is remove the NaNs from data. Doing so, however, also requires that the corresponding positions in the 2D X, Y location arrays also be removed:

X, Y = np.indices(data.shape)
mask = ~np.isnan(data)
x = X[mask]
y = Y[mask]
data = data[mask]

Now you can use optimize.leastsq (or the newer, simpler optimize.curve_fit) to fit the data to the model function:

p, success = optimize.leastsq(errorfunction, params, args=(x, y, data))

For example, if we generate some random data with NaNs

data = make_data(shape)

so that

import matplotlib.pyplot as plt
plt.imshow(data)
plt.show()

looks like

enter image description here

with the white spots showing where there are NaN values, then

import numpy as np
from scipy import optimize
np.set_printoptions(precision=4)


def gaussian(p, x, y):
    height, center_x, center_y, width_x, width_y = p
    return height*np.exp(-(((center_x-x)/width_x)**2+((center_y-y)/width_y)**2)/2)

def moments(data):
    total = np.nansum(data)
    X, Y = np.indices(data.shape)
    center_x = np.nansum(X*data)/total
    center_y = np.nansum(Y*data)/total
    row = data[int(center_x), :]
    col = data[:, int(center_y)]
    width_x = np.nansum(np.sqrt(abs((np.arange(col.size)-center_y)**2*col))
                        /np.nansum(col))
    width_y = np.nansum(np.sqrt(abs((np.arange(row.size)-center_x)**2*row))
                        /np.nansum(row))
    height = np.nanmax(data)
    return height, center_x, center_y, width_x, width_y

def errorfunction(p, x, y, data):
    return gaussian(p, x, y) - data

def fitgaussian(data):
    params = moments(data)
    X, Y = np.indices(data.shape)
    mask = ~np.isnan(data)
    x = X[mask]
    y = Y[mask]
    data = data[mask]
    p, success = optimize.leastsq(errorfunction, params, args=(x, y, data))
    return p

def make_data(shape):
    h, w = shape
    p = 50, h/2.0, w/2.0, h/3.0, w/5.0
    print('Actual parameters: {}'.format(np.array(p)))
    X, Y = np.indices(shape)
    data = gaussian(p, X, Y) + np.random.random(shape)
    mask = np.random.random(shape) < 0.3
    data[mask] = np.nan
    return data

shape = 100, 200
data = make_data(shape)
X, Y = np.indices(shape)
parameters = fitgaussian(data)
print('Fitted parameters: {}'.format(parameters))
fit = gaussian(parameters, X, Y)

yields

Actual parameters: [  50.       50.      100.       33.3333   40.    ]
Fitted parameters: [ 50.2908  49.9992  99.9927  33.7039  40.6149]
0
Alexander McFarlane On

Just remove all the values that don't have a corresponding flux value. Removing pairs of values won't matter if you don't have anything on the y-axis at that point.

This should remove all values that don't have flux values if the empty values are equal to ''

# assumes data.shape = (1, 3) where data[:,0:1] is the x,y axis
# data[:,2] contains the flux values
data = numpy.delete(data, numpy.where(data[:,3] == ''), axis=0)

this will do the job if empty values are equal to nan

data = numpy.delete(data, numpy.where(data[:,3] == numpy.nan), axis=0)