Modifying values in .pdb file and save as new

Question

Modifying values in .pdb file and save as new

87 views Asked by Irene Ferfoglia At 11 March 2024 at 15:53

for an assignment I have to read a small .pdb file in Python, change some values, and save it as a new .pdb file.
My original file is like this:

ATOM     19  HD2 TYR     1      26.910  61.717  39.871  1.00  0.00           H  
ATOM     20  C   TYR     1      29.796  62.354  41.246  1.00  0.00           C  
ATOM     23  H   SER     2      30.611  61.950  39.410  1.00  0.00           H  
ATOM     24  CA  SER     2      30.082  64.035  39.354  1.00  0.00           C 
END

I have tried with Pandas but with no success, as I cannot save it in the desired extension without it also saving an index, which I don't want (I used .to_csv('newfile.pdb')).
I have found a module called BioPython, and this is my attempt with it:

from Bio.PDB import PDBParser, PDBIO

def translate_atoms(structure, resname, translation_vector):
    for model in structure:
        for chain in model:
            for residue in chain:
                if residue.resname == resname:
                    for atom in residue:
                        atom.coord += translation_vector

pdb_file = "pdb1.pdb"
# Read the PDB file
pdb_parser = PDBParser(QUIET=True)
structure = pdb_parser.get_structure("original_pdb1", pdb_file)

# Translation vector for x direction (0.55 nm, so 5.5Å)
translation_vector = [5.5, 0, 0]

# Translate all atoms of SER residue in x direction
translate_atoms(structure, "SER", translation_vector)

# Write the modified structure to a new PDB file
output_pdb = "modified_pdb1.pdb"
pdb_io = PDBIO()
pdb_io.set_structure(structure)
pdb_io.save(output_pdb)

This does what I want as far as changing the values but when I save it adds an unwanted line, like so:

ATOM     19  HD2 TYR     1      26.910  61.717  39.871  1.00  0.00           H  
ATOM     20  C   TYR     1      29.796  62.354  41.246  1.00  0.00           C  
ATOM     23  H   SER     2      36.111  61.950  39.410  1.00  0.00           H  
ATOM     24  CA  SER     2      35.582  64.035  39.354  1.00  0.00           C  
TER      33      SER     2
END

How can I save it without that last line?

Thank you for your help!

Original Q&A

There are 2 answers

pippo1980 On 12 March 2024 at 17:51

Ok my attempt , using your input


from Bio.PDB import PDBParser, PDBIO

import Bio

print('Biopython version : ', Bio.__version__)


def translate_atoms(structure, resname, translation_vector):
    for model in structure:
        for chain in model:
            for residue in chain:
                if residue.resname == resname:
                    for atom in residue:
                        atom.coord += translation_vector

pdb_file = "input.pdb"
# Read the PDB file
pdb_parser = PDBParser(QUIET=True)
structure = pdb_parser.get_structure("original_pdb1", pdb_file)

# Translation vector for x direction (0.55 nm, so 5.5Å)
translation_vector = [5.5, 0, 0]

# Translate all atoms of SER residue in x direction
translate_atoms(structure, "SER", translation_vector)

# Write the modified structure to a new PDB file
output_pdb = "modified_pdb1.pdb"
pdb_io = PDBIO()
pdb_io.set_structure(structure)
pdb_io.save(output_pdb , write_end = False , preserve_atom_numbering = True)

will give :

ATOM     19  HD2 TYR     1      26.910  61.717  39.871  1.00  0.00           H  
ATOM     20  C   TYR     1      29.796  62.354  41.246  1.00  0.00           C  
ATOM     23  H   SER     2      36.111  61.950  39.410  1.00  0.00           H  
ATOM     24  CA  SER     2      35.582  64.035  39.354  1.00  0.00           C  
TER      25      SER     2

thanks to write_end = False in pdb_io.save(output_pdb , write_end = False , preserve_atom_numbering = True)

see Documentation class Bio.PDB.PDBIO.PDBIO(use_model_flag=0)

Longer code :


from Bio.PDB import PDBParser, PDBIO, Select

import Bio

from Bio.PDB.PDBExceptions import PDBIOException

print('Biopython version : ', Bio.__version__)

class PDBIO(PDBIO):
    
    def save(self, file, select = Select(), write_end=True, preserve_atom_numbering=False):
        """Save structure to a file.

        :param file: output file
        :type file: string or filehandle

        :param select: selects which entities will be written.
        :type select: object

        Typically select is a subclass of L{Select}, it should
        have the following methods:

         - accept_model(model)
         - accept_chain(chain)
         - accept_residue(residue)
         - accept_atom(atom)

        These methods should return 1 if the entity is to be
        written out, 0 otherwise.

        Typically select is a subclass of L{Select}.
        """
        if isinstance(file, str):
            fhandle = open(file, "w")
        else:
            # filehandle, I hope :-)
            fhandle = file

        get_atom_line = self._get_atom_line

        # multiple models?
        if len(self.structure) > 1 or self.use_model_flag:
            model_flag = 1
        else:
            model_flag = 0

        for model in self.structure.get_list():
            if not select.accept_model(model):
                continue
            # necessary for ENDMDL
            # do not write ENDMDL if no residues were written
            # for this model
            model_residues_written = 0
            if not preserve_atom_numbering:
                atom_number = 1
            if model_flag:
                fhandle.write(f"MODEL      {model.serial_num}\n")

            for chain in model.get_list():
                if not select.accept_chain(chain):
                    continue
                chain_id = chain.id
                if len(chain_id) > 1:
                    e = f"Chain id ('{chain_id}') exceeds PDB format limit."
                    raise PDBIOException(e)

                # necessary for TER
                # do not write TER if no residues were written
                # for this chain
                chain_residues_written = 0

                for residue in chain.get_unpacked_list():
                    if not select.accept_residue(residue):
                        continue
                    hetfield, resseq, icode = residue.id
                    resname = residue.resname
                    segid = residue.segid
                    resid = residue.id[1]
                    if resid > 9999:
                        e = f"Residue number ('{resid}') exceeds PDB format limit."
                        raise PDBIOException(e)

                    for atom in residue.get_unpacked_list():
                        if not select.accept_atom(atom):
                            continue
                        chain_residues_written = 1
                        model_residues_written = 1
                        if preserve_atom_numbering:
                            atom_number = atom.serial_number

                        try:
                            s = get_atom_line(
                                atom,
                                hetfield,
                                segid,
                                atom_number,
                                resname,
                                resseq,
                                icode,
                                chain_id,
                            )
                        except Exception as err:
                            # catch and re-raise with more information
                            raise PDBIOException(
                                f"Error when writing atom {atom.full_id}"
                            ) from err
                        else:
                            fhandle.write(s)
                            # inconsequential if preserve_atom_numbering is True
                            atom_number += 1

                # if chain_residues_written:
                #     fhandle.write(
                #         _TER_FORMAT_STRING
                #         % (atom_number, resname, chain_id, resseq, icode)
                #     )

            if model_flag and model_residues_written:
                fhandle.write("ENDMDL\n")
        if write_end:
            fhandle.write("END   \n")

        if isinstance(file, str):
            fhandle.close()


def translate_atoms(structure, resname, translation_vector):
    for model in structure:
        for chain in model:
            for residue in chain:
                if residue.resname == resname:
                    for atom in residue:
                        atom.coord += translation_vector

pdb_file = "input.pdb"
# Read the PDB file
pdb_parser = PDBParser(QUIET=True)
structure = pdb_parser.get_structure("original_pdb1", pdb_file)

# Translation vector for x direction (0.55 nm, so 5.5Å)
translation_vector = [5.5, 0, 0]

# Translate all atoms of SER residue in x direction
translate_atoms(structure, "SER", translation_vector)

# Write the modified structure to a new PDB file
output_pdb = "modified_pdb1.pdb"
pdb_io = PDBIO()
pdb_io.set_structure(structure)
pdb_io.save(output_pdb , write_end = False ,, preserve_atom_numbering = True)

will give :

ATOM     19  HD2 TYR     1      26.910  61.717  39.871  1.00  0.00           H  
ATOM     20  C   TYR     1      29.796  62.354  41.246  1.00  0.00           C  
ATOM     23  H   SER     2      36.111  61.950  39.410  1.00  0.00           H  
ATOM     24  CA  SER     2      35.582  64.035  39.354  1.00  0.00           C

because I monkey patched PDBIO class PDBIO(StructureIO) deleting :

# if chain_residues_written:
#     fhandle.write(
#         _TER_FORMAT_STRING
#         % (atom_number, resname, chain_id, resseq, icode)
#     )

need to import : from Bio.PDB.PDBExceptions import PDBIOException though

A side note:

your original code is incorrect missing : preserve_atom_numbering = True

in pdb_io.save(output_pdb) at least for last stable Biopython

**Umar** · Accepted Answer · 2024-03-12T13:43:42+00:00

I have dded a new function save_without_ter_end to save the modified structure without TER and END lines. The save_without_ter_end function constructs each line of the PDB file manually, ensuring that only ATOM records are written and appending the END line at the end. I removed the usage of PDBIO since it didn't provide the desired result of excluding TER and END lines.

from Bio.PDB import PDBParser, PDBIO

def translate_atoms(structure, resname, translation_vector):
    for model in structure:
        for chain in model:
            for residue in chain:
                if residue.resname == resname:
                    for atom in residue:
                        atom.coord += translation_vector

def save_without_ter_end(structure, filename):
    with open(filename, 'w') as pdb_file:
        for model in structure:
            for chain in model:
                for residue in chain:
                    for atom in residue:
                        atom_line = f"ATOM  {atom.serial_number:<5d} {atom.get_name():>4s} {residue.resname:>3s} {chain.id}{residue.id[1]:>4d}    {atom.coord[0]:8.3f}{atom.coord[1]:8.3f}{atom.coord[2]:8.3f}{atom.occupancy:6.2f}{atom.bfactor:6.2f}          {atom.element:>2s}\n"
                        pdb_file.write(atom_line)
        pdb_file.write("END\n")


pdb_file = "pdb1.pdb"
# Read the PDB file
pdb_parser = PDBParser(QUIET=True)
structure = pdb_parser.get_structure("original_pdb1", pdb_file)

# Translation vector for x direction (0.55 nm, so 5.5Å)
translation_vector = [5.5, 0, 0]

# Translate all atoms of SER residue in x direction
translate_atoms(structure, "SER", translation_vector)

# Write the modified structure to a new PDB file without TER and END lines
output_pdb = "modified_pdb1.pdb"
save_without_ter_end(structure, output_pdb)


ATOM  19     HD2 TYR     1      26.910  61.717  39.871  1.00  0.00           H
ATOM  20       C TYR     1      29.796  62.354  41.246  1.00  0.00           C
ATOM  23       H SER     2      36.111  61.950  39.410  1.00  0.00           H
ATOM  24      CA SER     2      35.582  64.035  39.354  1.00  0.00           C
END

or simply add this in your command. This code iterates over each atom in the structure and manually writes the PDB file without including the TER and END line.

# Manually write the PDB file
with open(output_pdb, 'w') as pdb_file:
    for model in structure:
        for chain in model:
            for residue in chain:
                for atom in residue:
                    atom_line = f"{atom.get_id():6s}{atom.get_name():5s}{residue.resname:>4s} {chain.id:4s}{residue.id[1]:>4d}    {atom.coord[0]:8.3f}{atom.coord[1]:8.3f}{atom.coord[2]:8.3f}{atom.occupancy:6.2f}{atom.bfactor:6.2f}          {atom.element:>2s}\n"
                    pdb_file.write(atom_line)

or u can acheive same by following

import pandas as pd
from Bio.PDB import PDBParser, PDBIO
# Read the original PDB file into a DataFrame
pdb_file = "pdb1.pdb"
columns = ["record_name", "atom_number", "atom_name", "residue_name", "chain_id", "residue_number", "x", "y", "z", "occupancy", "b_factor", "element_symbol", "charge"]
df = pd.read_csv(pdb_file, delim_whitespace=True, names=columns)

def translate_atoms(structure, resname, translation_vector):
    for model in structure:
        for chain in model:
            for residue in chain:
                if residue.resname == resname:
                    for atom in residue:
                        atom.coord += translation_vector

# Read the original PDB file
pdb_file = "pdb1.pdb"
pdb_parser = PDBParser(QUIET=True)
structure = pdb_parser.get_structure("original_pdb1", pdb_file)

# Define translation vector
translation_vector = [5.5, 0, 0]

# Translate all atoms of SER residue in x direction
translate_atoms(structure, "SER", translation_vector)

# Write the modified structure to a new PDB file
output_pdb = "modified_pdb1.pdb"
pdb_io = PDBIO()
pdb_io.set_structure(structure)


with open(output_pdb, 'w') as pdb_file:
    for model in structure:
        for chain in model:
            for residue in chain:
                for atom in residue:
                    atom_line = f"ATOM  {atom.serial_number:>4d} {atom.element:>2s} {residue.resname:>4s} {chain.id:4s}{residue.id[1]:>4d}    {atom.coord[0]:8.3f}{atom.coord[1]:8.3f}{atom.coord[2]:8.3f}{atom.occupancy:6.2f}{atom.bfactor:6.2f}          {atom.element:>2s}\n"
                    pdb_file.write(atom_line)
    pdb_file.write("END\n")

print("Modified PDB file saved successfully.")

or

from Bio.PDB import PDBParser, PDBIO
import pandas as pd

# Read the original PDB file
pdb_file = "pdb1.pdb"
pdb_parser = PDBParser(QUIET=True)
structure = pdb_parser.get_structure("original_pdb1", pdb_file)

# Create an empty DataFrame to store the atom information
atom_data = []

# Iterate over the structure and populate the DataFrame with atom information
for model in structure:
    for chain in model:
        for residue in chain:
            for atom in residue:
                atom_data.append([
                    atom.serial_number,
                    atom.element,
                    residue.resname,
                    chain.id,
                    residue.id[1],
                    atom.coord[0],
                    atom.coord[1],
                    atom.coord[2],
                    atom.occupancy,
                    atom.bfactor,
                    atom.element
                ])

# Create a DataFrame from the collected atom data
columns = ["atom_number", "atom_name", "residue_name", "chain_id", "residue_number", "x", "y", "z", "occupancy", "b_factor", "charge"]
df = pd.DataFrame(atom_data, columns=columns)
# Define translation vector
translation_vector = [5.5, 0, 0]

# Translate all atoms of SER residue in x direction
ser_mask = df["residue_name"] == "SER"
df.loc[ser_mask, ["x", "y", "z"]] += translation_vector

# Save the modified DataFrame to a new PDB file
output_pdb = "modified_pdb1.pdb"
output_pdb = "modified_pdb1.pdb"
# Save the DataFrame to a new PDB file
output_pdb = "modified_pdb1.pdb"
with open(output_pdb, 'w') as pdb_file:
    for index, row in df.iterrows():
        atom_line = f"ATOM  {row['atom_number']:>4d} {row['atom_name']:>2s} {row['residue_name']:>4s} {row['chain_id']:4s}{row['residue_number']:>4d}    {row['x']:8.3f}{row['y']:8.3f}{row['z']:8.3f}{row['occupancy']:6.2f}{row['b_factor']:6.2f}          {row['charge']:>2s}\n"
        pdb_file.write(atom_line)
    pdb_file.write("END\n")

print("Modified PDB file saved successfully.")

TechQA.

Modifying values in .pdb file and save as new

There are 2 answers

Related Questions in PYTHON

Related Questions in BIOINFORMATICS

Related Questions in BIOPYTHON

Popular Questions

Trending Questions