Could you please help me parse the following XML? NaN is not handled properly. Previous question: How to get DF from XML using read_xml() and xpath?
import pandas as pd
from io import StringIO # StringIO
xml = '''<?xml version="1.0" encoding="UTF-8" ?>
<vector result="5">
<data vectorkey="0" type="Document">
<result><RGT_STD_DT value="20231101"/><TH1_PAY_TERM_BEGIN_DT value=""/><SHOTN_ISIN value="183190"/><ISSUCO_CUSTNO value="21505"/></result>
</data>
<data vectorkey="1" type="Document">
<result><RGT_STD_DT value="20231031"/><TH1_PAY_TERM_BEGIN_DT value=""/><SHOTN_ISIN value="448730"/><ISSUCO_CUSTNO value="48052"/></result>
</data>
<data vectorkey="2" type="Document">
<result><RGT_STD_DT value="20231031"/><TH1_PAY_TERM_BEGIN_DT value=""/><SHOTN_ISIN value="453440"/><ISSUCO_CUSTNO value="48543"/></result>
</data>
<data vectorkey="3" type="Document">
<result><RGT_STD_DT value="20231005"/><TH1_PAY_TERM_BEGIN_DT value="20231025"/><SHOTN_ISIN value="009970"/><ISSUCO_CUSTNO value="997"/></result>
</data>
<data vectorkey="4" type="Document">
<result><RGT_STD_DT value="20231004"/><TH1_PAY_TERM_BEGIN_DT value="20231018"/><SHOTN_ISIN value="078520"/><ISSUCO_CUSTNO value="9602"/></result>
</data>
</vector>'''
df = pd.read_xml(StringIO(xml), xpath='//*[local-name()="result"]/*')
print(df)
df2 = []
i = 0
while (i < len(df.index)):
tdf = df[i:i+4]
tdf = tdf.transpose()
tdf.columns = ["RGT_STD_DT", "TH1_PAY_TERM_BEGIN_DT", "SHOTN_ISIN", "ISSUCO_CUSTNO"]
df2.append(tdf)
i = i + 4
print(pd.concat(df2))
D:\Temp\python xml_to_df.py
value
0 20231101
1 183190
2 21505
3 20231031
4 448730
5 48052
6 20231031
7 453440
8 48543
9 20231005
10 20231025
11 9970
12 997
13 20231004
14 20231018
15 78520
16 9602
Traceback (most recent call last):
File "D:\Temp\xml_to_df.py", line 35, in make_dividend_from_seibro
tdf.columns = ["RGT_STD_DT", "TH1_PAY_TERM_BEGIN_DT", "SHOTN_ISIN", "ISSUCO_CUSTNO"]
^^^^^^^^^^^
...
File "C:\Users\gmde0\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\base.py", line 70, in _validate_set_axis
raise ValueError(
ValueError: Length mismatch: Expected axis has 1 elements, new values have 4 elements