MACCS Fingerprint

221 views Asked by At

When I perform a Python heatmap of similarity search I can't seem to get the heatmap that actually creates a comparison. But instead I am getting a comparison it says that everything has a similarity of one regardless of if it even if it is a similarity or a dissimilarity search. Here is what I have for the code.

import matplotlib
import os
from os.path import join
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import DataStructs
import numpy as np
from rdkit.Chem import MACCSkeys
from rdkit.Chem.AtomPairs import Pairs
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw
import seaborn as sns
import pandas as pd
from matplotlib.colors import LogNorm
import matplotlib.pyplot as plt
from rdkit.DataManip.Metric import GetTanimotoDistMat
from rdkit.DataManip.Metric import GetTanimotoSimMat

from rdkit import rdBase
from rdkit.Chem import RDConfig

suppl=Chem.SDMolSupplier('/Users/emmafath/Desktop/toxicitydata_toxic_20.3.sdf')
sdfdir = os.path.join(RDConfig.RDDocsDir, '/Users/emmafath/Desktop/toxicitydata_toxic_20.3.sdf')

mols = [x for x in suppl]

Draw.MolsToGridImage(mols[:23], molsPerRow=5)

morganfps = [AllChem.GetMorganFingerprintAsBitVect(m,2) for m in mols]
maccsfps = [MACCSkeys.GenMACCSKeys(x) for x in mols]

distmat = GetTanimotoDistMat(maccsfps)
simmat = GetTanimotoSimMat(maccsfps)

def tri2mat(tri_arr):
    n = len(tri_arr)
    m = int((np.sqrt(1 + 4*2*n)+1)/2)
    arr = np.ones([m, m])
    for i in range(m):
        for j in range(i):
            arr[i][j] = tri_arr[i + j - 1]
            arr[j][i] = tri_arr[i + j -1]
        return arr

distarr = tri2mat(distmat)
simmat = tri2mat(simmat)

heatmapdis = sns.heatmap(distarr[:,:])
heatmapdis.set_title('Dissimilarity using MACCS fingerprints', fontdict={'fontsize':12},pad=12);
#plt.ylabel('')
#plot.xlabel('')
#plot.savefig('heatmap_dissimilarity_MACCS.png', dpi=300)

heatmapsim= sns.heatmap(simmat[:,:])
heatmapsim.set_title('Similary using MACCS fingerprint', fontdict={'fontsize':12}, pad=12)

I expected to get a normal heatmap, but this is what I got.

1

There are 1 answers

0
wikke On

The problem is with your tri2mat function. tri2mat([1,2,3,4]) returns:

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

replace the function with

def tri2mat(tri_arr):
    n = len(tri_arr)
    m = int((np.sqrt(1 + 4 * 2 * n) + 1) / 2)
    arr = np.ones([m, m])
    counter=0
    for i in range(m):
        for j in range(i):
            arr[i][j] = tri_arr[counter]
            arr[j][i] = tri_arr[counter]
            counter+=1
    return arr

now it works.