How to add "OQ" tag with read quality score as additional field in the bam file using pysam

633 views Asked by At

I need to add read quality with OQ tag as an additional field to the bam file using pysam.

Other conventional ways using samtools etc consume more time and create multiple files.

I tried with the given below script but end up unsigned character instead of the quality score string! Any help appreciated.

Thanks in advance.

eg. input bam:

E00577:205:HVF37CCXY:3:2224:32461:53258 99      chr1    10419997        60      151M    =       10420034        188     GGCAGTGGCTTCCGCGTGCCCCGTGTGCTGGTGCGGTTCCCATCACGCAGACAGGAAGGGTGTTTGCGCACTCTGATCAACTGGAACCTCTGTATCANGCGGCTGAATTCCCTTTTTCCTTNACTCNATAAAAGCTACATCAGACTGATGN      AAFFFJJJJJJJJJJJJJJJJJJJJJJJJJJJ<F<J-FJJJJJJJJJJJJJJFJJF<JJJFJ<JJJJJJ<JJFJJJJJJJJJJJJJJJJJJJFJJJJ#JJJJJAJJFJJJJJJJJJJJAFJ#FJJJ#JJJJJJJJFJJAJFAF<JJJAJF# MD:Z:97T23T4A23C0       PG:Z:MarkDuplicates  RG:Z:HVF37CCXY.3        NM:i:4  AS:i:144        XS:i:107

expected output bam:

E00577:205:HVF37CCXY:3:2224:32461:53258 99      chr1    10419997        60      151M    =       10420034        188     GGCAGTGGCTTCCGCGTGCCCCGTGTGCTGGTGCGGTTCCCATCACGCAGACAGGAAGGGTGTTTGCGCACTCTGATCAACTGGAACCTCTGTATCANGCGGCTGAATTCCCTTTTTCCTTNACTCNATAAAAGCTACATCAGACTGATGN      AAFFFJJJJJJJJJJJJJJJJJJJJJJJJJJJ<F<J-FJJJJJJJJJJJJJJFJJF<JJJFJ<JJJJJJ<JJFJJJJJJJJJJJJJJJJJJJFJJJJ#JJJJJAJJFJJJJJJJJJJJAFJ#FJJJ#JJJJJJJJFJJAJFAF<JJJAJF# MD:Z:97T23T4A23C0       PG:Z:MarkDuplicates  RG:Z:HVF37CCXY.3        NM:i:4  AS:i:144        XS:i:107        OQ:Z:AAFFFJJJJJJJJJJJJJJJJJJJJJJJJJJJ<F<J-FJJJJJJJJJJJJJJFJJF<JJJFJ<JJJJJJ<JJFJJJJJJJJJJJJJJJJJJJFJJJJ#JJJJJAJJFJJJJJJJJJJJAFJ#FJJJ#JJJJJJJJFJJAJFAF<JJJAJF#

pysam

import os
import argparse
import pysam
parser = argparse.ArgumentParser(description = 'Generate BAM with OQ tag')
parser.add_argument('-i', '--input', required=True, help='Input mark dup BAM File')
parser.add_argument('-o', '--output', required=True, help='Output BAM file with OQ tag')

args = parser.parse_args()
infile_path = os.path.abspath(args.input)
outfile_path = os.path.abspath(args.output)

infile = pysam.AlignmentFile(infile_path, "rb")
outfile = pysam.AlignmentFile(outfile_path, "wb", template=infile)
iter = infile.fetch(until_eof=True)
for read in iter:
    read.set_tag("OQ", read.query_qualities, replace=False)
    outfile.write(read)
infile.close()
outfile.close()

python GenerateBamWithOQTag.py -i subset.bam -o subset_OQ.bam

E00577:205:HVF37CCXY:3:2224:32461:53258 99      chr1    10419997        60      151M    =       10420034        188     GGCAGTGGCTTCCGCGTGCCCCGTGTGCTGGTGCGGTTCCCATCACGCAGACAGGAAGGGTGTTTGCGCACTCTGATCAACTGGAACCTCTGTATCANGCGGCTGAATTCCCTTTTTCCTTNACTCNATAAAAGCTACATCAGACTGATGN      AAFFFJJJJJJJJJJJJJJJJJJJJJJJJJJJ<F<J-FJJJJJJJJJJJJJJFJJF<JJJFJ<JJJJJJ<JJFJJJJJJJJJJJJJJJJJJJFJJJJ#JJJJJAJJFJJJJJJJJJJJAFJ#FJJJ#JJJJJJJJFJJAJFAF<JJJAJF# MD:Z:97T23T4A23C0       PG:Z:MarkDuplicates  RG:Z:HVF37CCXY.3        NM:i:4  AS:i:144        XS:i:107        OQ:B:C,32,32,37,37,37,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,27,37,27,41,12,37,41,41,41,41,41,41,41,41,41,41,41,41,41,41,37,41,41,37,27,41,41,41,37,41,27,41,41,41,41,41,41,27,41,41,37,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,37,41,41,41,41,2,41,41,41,41,41,32,41,41,37,41,41,41,41,41,41,41,41,41,41,41,32,37,41,2,37,41,41,41,2,41,41,41,41,41,41,41,41,37,41,41,32,41,37,32,37,27,41,41,41,32,41,37,2
1

There are 1 answers

0
user3214212 On
"""
USAGE:-

python Generate_bam_with_OQtag.py -i <input_file>  -o <output_file>

"""

import os
import argparse
import pysam

parser = argparse.ArgumentParser(description = 'Generate BAM with OQ tag')
parser.add_argument('-i', '--input', required=True, help='Input mark dup BAM File')
parser.add_argument('-o', '--output', required=True, help='Output BAM file with OQ tag')

args = parser.parse_args()
infile_path = os.path.abspath(args.input)
outfile_path = os.path.abspath(args.output)

infile = pysam.AlignmentFile(infile_path, "rb")
outfile = pysam.AlignmentFile(outfile_path, "wb", template=infile)
iter = infile.fetch(until_eof=True)
for read in iter:
    read.set_tag('OQ', pysam.qualities_to_qualitystring(read.query_qualities), replace=False)
    outfile.write(read)
infile.close()
outfile.close()