How to index a list of points for faster searches of nearby points?

188 views Asked by At

For a list of (x, y) points, I am trying to find the nearby points for each point.

from collections import defaultdict
from math import sqrt
from random import randint

# Generate a list of random (x, y) points
points = [(randint(0, 100), randint(0, 100)) for _ in range(1000)]

def is_nearby(point_a, point_b, max_distance=5):
    """Two points are nearby if their Euclidean distance is less than max_distance"""
    distance = sqrt((point_b[0] - point_a[0])**2 + (point_b[1] - point_a[1])**2)
    return distance < max_distance

# For each point, find nearby points that are within a radius of 5
nearby_points = defaultdict(list)
for point in points:
    for neighbour in points:
        if point != neighbour:
            if is_nearby(point, neighbour):
                nearby_points[point].append(neighbour)

Is there any way I can index points to make the above search faster? I feel there must be some faster way than O(len(points)**2).

Edit: the points in general could be floats, not just ints

1

There are 1 answers

2
hiro protagonist On BEST ANSWER

this is a version with a fixed grid where each gridpoint holds the number of samples that are there.

the search can then be reduced to just the space around the point in question.

from random import randint
import math

N = 100
N_SAMPLES = 1000

# create the grid
grd = [[0 for _ in range(N)] for __ in range(N)]

# set the number of points at a given gridpoint
for _ in range(N_SAMPLES):
    grd[randint(0, 99)][randint(0, 99)] += 1

def find_neighbours(grid, point, distance):

    # this will be: (x, y): number of points there
    points = {}

    for x in range(point[0]-distance, point[0]+distance):
        if x < 0 or x > N-1:
            continue
        for y in range(point[1]-distance, point[1]+distance):
            if y < 0 or y > N-1:
                continue
            dst = math.hypot(point[0]-x, point[1]-y)
            if dst > distance:
                continue
            if grd[x][y] > 0:
                points[(x, y)] = grd[x][y]
    return points

print(find_neighbours(grid=grd, point=(45, 36), distance=5))
# -> {(44, 37): 1, (45, 33): 1, ...}
# meadning: there is one neighbour at (44, 37) etc...

for further optimzation: the tests for x and y could be precalculated for a given gridsize - the math.hypot(point[0]-x, point[1]-y) would not have to be done then for every point.

and it may be a good idea to replace the grid with a numpy array.


UPDATE

if your points are floats you can still create an int grid to reduce the search space:

from random import uniform
from collections import defaultdict
import math

class Point:
    def __init__(self, x, y):
        self.x = x
        self.y = y

    @property
    def x_int(self):
        return int(self.x)

    @property
    def y_int(self):
        return int(self.y)

    def __str__(self):
        fmt = '''{0.__class__.__name__}(x={0.x:5.2f}, y={0.y:5.2f})'''
        return fmt.format(self)

N = 100
MIN = 0
MAX = N-1

N_SAMPLES = 1000


# create the grid
grd = [[[] for _ in range(N)] for __ in range(N)]

# set the number of points at a given gridpoint
for _ in range(N_SAMPLES):
    p = Point(x=uniform(MIN, MAX), y=uniform(MIN, MAX))
    grd[p.x_int][p.y_int].append(p)


def find_neighbours(grid, point, distance):

    # this will be: (x_int, y_int): list of points
    points = defaultdict(list)

    # need to cast a slightly bigger net on the upper end of the range;
    # int() rounds down
    for x in range(point[0]-distance, point[0]+distance+1):
        if x < 0 or x > N-1:
            continue
        for y in range(point[1]-distance, point[1]+distance+1):
            if y < 0 or y > N-1:
                continue
            dst = math.hypot(point[0]-x, point[1]-y)
            if dst > distance + 1:  # account for rounding... is +1 enough?
                continue
            for pt in grd[x][y]:
                if math.hypot(pt.x-x, pt.y-y) <= distance:
                    points[(x, y)].append(pt)
    return points

res = find_neighbours(grid=grd, point=(45, 36), distance=5)

for int_point, points in res.items():
    print(int_point)
    for point in points:
        print('  ', point)

the output looks something like this:

(44, 36)
   Point(x=44.03, y=36.93)
(41, 36)
   Point(x=41.91, y=36.55)
   Point(x=41.73, y=36.53)
   Point(x=41.56, y=36.88)
...

for convenience Points is now a class. may not be necessary though...

depending on how dense or sparse your points are you could also represent the grid as a dictionary pointing to a list or Points...

also the find_neighbours function accepts a starting point consisting of ints only in that version. this might also be refined.

and there is much room for improvement: the range of the y axis can be restricted using trigonometry. and for the points way inside the circle there is no need for an individual check; detailed checking only needs to be done close to the outer rim of the circle.