I have a list of Images. What is the fastest way to compare the images in the list with a similarity(imga,imgb) score and group them together into a dictionary with the first item as key after a threshold on the returned similarity.


ImgList = [img1, img2, img3,img4, img5,img6]

If img1, img3 have similarity score 0.7 (>0.5)

If img2, im4, img6 have similarity 0.6 (>0.5)

Output = {img1:[img3], img2:[img4,img6], img5:[]}

My approach(index error):

for i in ImgList:
     for j in ImgList:
          #compare code here


def get_sim(img1,img2):
    (score, diff) = measure.compare_ssim(img1, img2, full=True)
    return score

img1 = cv2.imread("1.png")
img2 = cv2.imread("2.png") 
img3 = cv2.imread("3.png")
img4 = cv2.imread("4.png") 
img5 = cv2.imread("5.png")
img6 = cv2.imread("6.png") 

imgs = [img1,img2,img3,img4,img5,img6]

for i in imgs:
    for j in imgs:
        similarity = get_sim(i,j) # values in range 0 to 1
                #Need to group i,j

3 Answers

Woods Chen On

My former answer may not meed your requirement, and this may work:

res = {}
for i in range(len(ImgList)):
    for j in ImgList[i:]:
        res.setdefault(get_sim(ImgList[i],j), []).append(j)
res = {i.pop(0):i for i in res.values()}

and you can write it in a way of list comprehension

res = {}
_ = [res.setdefault(get_sim(ImgList[i],j), []).append(j) for i in range(len(ImgList)) for j in ImgList[i:]]
res = {i.pop(0):i for i in res.values()}
zero On

without any additional details,

create a function that uses the similarity function to create a list above a threshold and then use that function inside a dictionary comprehension. Like so:

def find_imgs_above_threshold(img, img_list, threshold=0.5):
    img_list_without_img = img_list.remove(img)
    sim_scores = [similarity(img, i) for i in img_list_without_img]
    imgs_above_threshold= [score for score in sim_scores if score >= threshold]
    return imgs_above_threshold

img_dict = {i: find_imgs_above_threshold(i, imgList) for i in imgList}
Waket Zheng On
imgs = [cv2.imread(f"{i}.png") for i in range(1, 7)]

output = {}
score_img = {}

for img in imgs:
    score = get_sim(img)
    if score > 0.5:
        if score not in score_img:
            score_img[score] = img
            output[img] = []