Extracting lines from a text in python

149 views Asked by At

I am trying to extract the lines that start with this entries and create a new file. This is what I have:

def ReadFileContents():
    file_content = open('Testing.pdb')
    all_file_content = file_content.readlines()
    list3 = []
    for line in all_file_content:
        if line.startswith('TER'):`enter code here`
            list3.append(line)
            if line.startswith('HETATM'):
                list3.apped(line)
                if line.startswith('CONECT'):
                    list3.append(line)
                    if line.startswith('MASTER'):
                        list3.append(line)
                        if line.startswith('END'):
                            list3.append(line)
            file = open('list3.txt', 'w')
            for line in list3:
                file.write(line + '\n')
            file.close()

ReadFileContents()

The problem is that only creates the list3.txt file with the lines that start with TER. Any ideas why?

Thanks, Pedro

3

There are 3 answers

1
Nemoden On

Spaces in the beginning of the line denote a code block level in python. Suppose you have the following code:

if A:
    #do something
    if B:
        #do another thing

another thing will be done only if A condition is True.

def ReadFileContents():
    file_content = open('Testing.pdb')
    all_file_content = file_content.readlines()
    list3 = []
    for line in all_file_content:
        if line.startswith('TER'):
            list3.append(line)
        if line.startswith('HETATM'):
            list3.apped(line)
        if line.startswith('CONECT'):
            list3.append(line)
        if line.startswith('MASTER'):
            list3.append(line)
        if line.startswith('END'):
            list3.append(line)
    file = open('list3.txt', 'w')
    for line in list3:
        file.write(line + '\n')
    file.close()
0
Vishnu Upadhyay On

You better use elif rather than nested if.And its always better to use context manager with to open the file and you don't need to close the file manually.

change this:-

file_content = open('Testing.pdb')
all_file_content = file_content.readlines()

to this:-

with open('Testing.pdb') as file_content:

Then complete code is:-

def ReadFileContents():
    list3 = []
    with open('Testing.pdb') as file_content: # Use `with`.
        for line in file_content.readlines():      
            if line.startswith('TER'):
                list3.append(line)
            elif line.startswith('HETATM'):
                list3.append(line)
            elif line.startswith('CONECT'):
                list3.append(line)
            elif line.startswith('MASTER'):
              list3.append(line)
            elif line.startswith('END'):
               list3.append(line)
    with open('list3.txt', 'w') as f:
        for line in list3:
            f.write(line + '\n')

        #Read the file
         f.seek(0)
         print f.read()

ReadFileContents()
0
vks On
import re
def ReadFileContents():
file_content = open('Testing.pdb')
all_file_content = file_content.readlines()
list3 = []
for line in all_file_content:
        m=re.match(r"^(?:TER|HETATM|CONECT|MASTER|END).*$,line)
        if m:
            list3.append(m.group())





file = open('list3.txt', 'w')
for line in list3:
      file.write(line + '\n')
file.close()

You can use re to get what you want.It is more scalable too.