Multithreading or Async file handling with Python

58 views Asked by At

i have created a script which catches files in a folder and does conversion operation (from word to pdf) and it creates folders based on their file names and places them in there. However, problem is that if there is many files the conversion operation is slow. Here is the code, but i am not sure which way to go. If i should try to learn multithreading or async file handling for this particular problem.

import os
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from win32com import client
import pythoncom
import shutil
import asyncio
from docx2pdf import convert
"""import aspose.words as aw
"""

baseAd = r"C:\inetpub\wwwroot\utkuploads"

"""This part is for catching errors.
"""
def createText(filename, filedetail):
    with open(r"C:\inetpub\wwwroot\utkuploads\{filename}.txt".format(filename=filename), 'w') as f:
        f.write(f'{filedetail}')

"""This doc2pdf works with WORD in backend. It opens word and converts the file to pdf.
"""
def doc2pdf(doc_name, pdf_name):
    pythoncom.CoInitialize()

    word = client.DispatchEx("Word.Application")

    if os.path.exists(pdf_name):
        os.remove(pdf_name)

    worddoc = word.Documents.Open(doc_name, ReadOnly=1)

    try:
        worddoc.SaveAs(pdf_name, FileFormat=17)
    except Exception as e:
        createText('saveasExceptionXX', f"{e}")
    worddoc.Close()

    # Quit the Word application
    word.Quit()

    pythoncom.CoUninitialize()

    return pdf_name



"""def doc2pdfX(doc_name, pdf_name):
    #second best
    convert(doc_name, pdf_name)"""




"""def doc2pdf2zz(doc_name, pdf_name):
    #best
    doc = aw.Document(doc_name)
    doc.save(pdf_name)"""




class DocFileHandler(FileSystemEventHandler):
    def is_temporary_file(self, filename):
        return filename.startswith("~$")

    """To create folders we use the below, it takes baseAd which is defined at the beginning and folderName as parameters."""

    def createFolder(self,baseAd,folderName):
        path = f"{baseAd}\{folderName}"
        isExist = os.path.exists(path)


        if not isExist:
            os.makedirs(path)
            return path
        else:
            return path

    """To create folders we use the below, it takes baseAd which is defined at the beginning and folderName as parameters."""

    def createFolderAtt(self,folderName):
        isExist = os.path.exists(folderName)
        if not isExist:
            os.makedirs(folderName)
            return folderName
        else:
            return folderName


    def on_created(self, event):
        try:
            """The first [0] is root directory utkuploads the second is the file name with extension"""
            currentFileName = os.path.split(event.src_path)
            currentFileNameSplitted = os.path.split(event.src_path)[-1]
            if '.tmp' not in currentFileNameSplitted:
                pass
                print(f'File name {currentFileName}, splittted: {currentFileNameSplitted} has entered to server.')

            """If it is directory just pass don't do anything."""

            if event.is_directory:
                return



            #"""This part needs to work for the files that needs to be converted to PDF"""

            #It catches DOCX files and takes their location by doc_path and creates a fake pdf_path directory

            elif event.event_type == 'created' and event.src_path.lower().endswith('.docx') and '@' not in currentFileNameSplitted and not self.is_temporary_file(
                    event.src_path):
                doc_path = event.src_path
                pdf_path = os.path.splitext(doc_path)[0] + '.pdf'

                #print(f'Doc path: {doc_path}, \nPdf path: {pdf_path}')


                # If '_' in doc_path
                if '_' in doc_path:
                    print(f'New Template has been detected: {doc_path}')
                    return
                # If file is not temporary, not _ (template), not attachment, not TEMPLATE-REPORT
                elif '~$' not in doc_path and '_' not in doc_path and '@' not in doc_path and 'TEMPLATE-REPORT' not in doc_path:

                    #print(f"File will be converted here: {doc_path}")

                    try:
                        if '-GENERATED-REPORT' in doc_path:
                            # Here pdf convertion happens.
                            doc2pdf(doc_path, pdf_path)
                            # Create subFolder based on PDF file.
                            createFolderPath = os.path.split(pdf_path)[-1].split(".")[0]
                            createFolderPath = createFolderPath.replace('-GENERATED-REPORT', '')

                            newFolderPath = self.createFolder(baseAd, createFolderPath)
                            #print(f"New folder has been created: {newFolderPath}")
                            pdfFileName = os.path.split(pdf_path)[-1]

                            src_pdf = pdf_path
                            dest_pathPdf = os.path.join(newFolderPath, pdfFileName)
                            shutil.move(src_pdf, dest_pathPdf)
                            #print(f"File has been moved to its destination. src: {src_pdf}, destination: {dest_pathPdf} ")

                           #print('Doc path', doc_path)
                            wordFileName = os.path.split(doc_path)[-1]
                            wordPdf = wordFileName
                            dest_pathWord = os.path.join(newFolderPath, wordPdf)
                            shutil.move(doc_path, dest_pathWord)

                           #print( f"Generated Rapor File has been moved to its destination. src: {doc_path}, destination: {dest_pathWord} ")

                        elif '-IMZALIRAPOR' in doc_path:
                            # Here pdf convertion happens.
                            doc2pdf(doc_path, pdf_path)
                            # Create subFolder based on PDF file.
                            createFolderPath = os.path.split(pdf_path)[-1].split(".")[0]
                            createFolderPath = createFolderPath.replace('-IMZALIRAPOR', '')

                            newFolderPath = self.createFolder(baseAd, createFolderPath)
                            #print(f"New folder has been created: {newFolderPath}")
                            pdfFileName = os.path.split(pdf_path)[-1]

                            src_pdf = pdf_path
                            dest_pathPdf = os.path.join(newFolderPath, pdfFileName)
                            shutil.move(src_pdf, dest_pathPdf)
                            #print(f"File has been moved to its destination. src: {src_pdf}, destination: {dest_pathPdf} ")

                            #print('Doc path', doc_path)
                            wordFileName = os.path.split(doc_path)[-1]
                            wordPdf = wordFileName
                            dest_pathWord = os.path.join(newFolderPath, wordPdf)
                            shutil.move(doc_path, dest_pathWord)

                            #print(f"Imzali Report File has been moved to its destination. src: {doc_path}, destination: {dest_pathWord} ")


                        elif 'GENERATED-REPORT' not in doc_path and '-IMZALIRAPOR' not in doc_path and '@' not in doc_path:
                            #Here pdf convertion happens.
                            doc2pdf(doc_path, pdf_path)
                            #Create subFolder based on PDF file.
                            createFolderPath = os.path.split(pdf_path)[-1].split(".")[0]

                            newFolderPath = self.createFolder(baseAd, createFolderPath)
                            #print(f"New folder has been created: {newFolderPath}")
                            pdfFileName = os.path.split(pdf_path)[-1]

                            src_pdf = pdf_path
                            dest_pathPdf = os.path.join(newFolderPath, pdfFileName)
                            shutil.move(src_pdf, dest_pathPdf)
                            #print(f"File has been moved to its destination. src: {src_pdf}, destination: {dest_pathPdf} ")



                            #print('Doc path', doc_path)
                            wordFileName = os.path.split(doc_path)[-1]
                            wordPdf = wordFileName
                            dest_pathWord = os.path.join(newFolderPath, wordPdf)
                            shutil.move(doc_path, dest_pathWord)

                            #print(f"File has been moved to its destination. src: {doc_path}, destination: {dest_pathWord} ")

                    except Exception as e:
                        createText('exceptionHasOccured...', f'{e}')


            elif event.event_type == 'created' and '@' in currentFileNameSplitted and not self.is_temporary_file(
                    event.src_path):
                doc_path = event.src_path
                folderPath = currentFileNameSplitted.split("@")[1].split(".")[0]

                try:
                    baseFolderPath = os.path.split(doc_path)[:-1][0]
                    #print(f"Attachments detected: {doc_path}, {currentFileNameSplitted}, {baseFolderPath}")
                    dest_path = os.path.join(baseFolderPath, folderPath, currentFileNameSplitted)
                    try:
                        shutil.move(doc_path, dest_path)
                    except:
                        try:
                            self.createFolderAtt(os.path.join(baseFolderPath, folderPath))
                            shutil.move(doc_path, dest_path)
                        except Exception as e:
                            createText('InnerAttachmentError', f'{e}')
                except Exception as e:
                    createText('outerAttachmentErrorOccured', f'{e}')


        except Exception as e:
            createText('outerAllExceptionasOccured', f'{e}')







if __name__ == '__main__':
    directory_to_watch = r"C:\inetpub\wwwroot\utkuploads"
    event_handler = DocFileHandler()


    observer = Observer()
    observer.schedule(event_handler, path=directory_to_watch, recursive=False)
    observer.start()

    try:
        while True:
            pass
    except KeyboardInterrupt:
        observer.stop()
    observer.join()
0

There are 0 answers