I am using python docx for splitting doc file. But there is an issue. In original doc there is an image. When split is done, this image on it's place, with required shape, but with title: "Can't display drawing". Can't handle it with inline shape. Here is code:
for element in doc.element.body:
if isinstance(element, CT_Tbl):
table = Table(element, doc)
full_text = ''
for j, row in enumerate(table.rows):
if j == 0 and article == 39:
for k, cell in enumerate(row.cells):
if k == 0:
if dir_name:
if not doc_name:
doc_name = 'error'
doc_name = f'{ep_num}_' + re.sub(r'"', '', doc_name)
doc_name_unique = doc_name
pv_dir_path = f'{final_directory}/{company}/{dir_name}/{doc_name_unique}'
if not os.path.exists(pv_dir_path):
os.makedirs(pv_dir_path)
num = 1
while os.path.exists(f'{pv_dir_path}/{doc_name_unique}.docx'):
doc_name_unique = f'{doc_name} ({num})'
num += 1
for el in elements_to_save:
doc2.element.body.append(el)
elements_to_save = []
doc2.save(f'{pv_dir_path}/{doc_name_unique}.docx')
proxy_path = proxy_names.get(company)
if proxy_path:
proxy_filename = re.split(r'[/\\]', proxy_path)[-1]
copy(proxy_path, f'{pv_dir_path}\\{proxy_filename}')
doc2 = docx.Document()
doc_name = ''
i += 1
print(i)
if k == 1:
if process_type == 'pv_dvs':
pvs = re.findall(r'.+?(?=\nАдреса)', cell.text)
if not pvs:
raise Exception('ПВ/ДВС не найдено')
dir_name = re.sub(r'\s+', ' ', pvs[0]).strip()
print('ПВ/ДВС:', dir_name)
else:
dir_name = re.split(r'\n', cell.text)[0]
print('СУД:', dir_name)
if dir_name not in dir_names['names']:
dir_names['names'].append(dir_name)
dir_num = max([0] + dir_names['number']) + 1
dir_names['number'].append(dir_num)
else:
dir_num = dir_names['number'][dir_names['names'].index(dir_name)]
dir_name = f'{dir_num}'
if j == 1 and article == 39:
for k, cell in enumerate(row.cells):
if k == 1:
company_names = re.split(r'["«»]', re.split(r'\n', cell.text)[0])
if len(company_names) < 3:
print('>>>>', re.split(r'\n', cell.text)[1])
raise Exception('fuck')
company = company_names[-2]
if j == 3 and process_type == 'pv_dvs' and article == 39:
for k, cell in enumerate(row.cells):
if k == 1:
cell_lines = re.split(r'\n', cell.text)
if len(cell_lines) < 3:
raise Exception(f'Боржник не найден в {cell.text}')
doc_name = cell_lines[0]
ep_nums = re.findall(r'\d+', cell_lines[2])
ep_num = int(ep_nums[0]) if ep_nums else ''
print('Боржник:', doc_name, 'ВП:', ep_num)
if article == 37:
cell = row.cells[1]
if j == 1:
pvs = re.sub(r'\s+', ' ', cell.text)
if not pvs:
raise Exception('ПВ/ДВС не найдено')
dir_name = re.sub(r'\s+', ' ', cell.text).strip()
print('ПВ/ДВС:', dir_name)
if j == 3:
company_names = re.split(r'["«»]', re.split(r'\n', cell.text)[0])
if len(company_names) < 3:
print('>>>>', re.split(r'\n', cell.text)[1])
raise Exception('fuck')
company = company_names[-2]
if j == 6:
doc_name = re.sub(r'\s+', ' ', cell.text).strip()
print('Боржник:', doc_name)
if isinstance(element, CT_P):
full_text = Paragraph(element, doc).text
case_texts = re.findall(r'за\s*№\s*([\d/цнп-]+)\s*про\sстягнення', full_text)
if case_texts:
case = case_texts[0]
if re.findall(r'виконавче\s*провадження\s*№\s*\d+', full_text):
ep_num = re.findall(r'виконавче\s*провадження\s*№\s*(\d+)', full_text)[0]
print('ВП:', ep_num)
elements_to_save.append(element)
I tried inline shape, did not work as expected.