Linked Questions

Popular Questions

I am trying to replace dates at the start of each block of my document, and I have a currently working PoC, but it feels sluggish. I think the problem is that I am using search when I know that the text to be replaced is in the beginning of the block, but I can't work out how I can select it.

from PyQt6 import QtWidgets, QtGui
import time


class MainWindow(QtWidgets.QMainWindow):
    def __init__(self):
        super().__init__()

        # Create a plain text edit widget and add some text to it
        self.text_edit = QtWidgets.QPlainTextEdit()
        text = ''
        for i in range(10000):
            text+="2022 something or the other\n2022 some other test\n"
        self.text_edit.setPlainText(text)
        self.setCentralWidget(self.text_edit)

        # Create a button and connect its clicked signal to the select_text function
        self.button = QtWidgets.QPushButton("Change Text")
        self.button.clicked.connect(self.select_text)
        toolbar = self.addToolBar("Toolbar")
        toolbar.addWidget(self.button)

    def select_text(self):
        old = ["2022"] * 20000
        new = ["2023"] * 20000
        start_time = time.perf_counter()
        cursor = self.text_edit.textCursor()
        cursor.beginEditBlock()

        for i in range(self.text_edit.document().blockCount()):
            block = self.text_edit.document().findBlockByNumber(i)
            # Search for a specific string within the block
            block_cursor = QtGui.QTextCursor(block)
            # Get a QTextCursor object for the block
            while not block_cursor.atEnd() and block_cursor.block() == block:
                # block_cursor.movePosition(QtGui.QTextCursor.Right, QtGui.QTextCursor.KeepAnchor, 4)
                block_cursor = block.document().find(old[i], block_cursor)


                if not block_cursor.isNull():
                    block_cursor.insertText(new[i])
        cursor.endEditBlock()
        end_time = time.perf_counter()
        elapsed_time = end_time - start_time
        print(f"Elapsed time: {elapsed_time:.2f} seconds")





if __name__ == "__main__":
    app = QtWidgets.QApplication([])
    window = MainWindow()
    window.show()
    app.exec()

This is my current code, working as required, but I hope that selecting with move position instead of searching will speed it up.

Edit: Based on musicamante's answer, the improved code:

    def select_text(self):
        old = ["2022"] * 20000
        new = ["2023"] * 20000
        start_time = time.perf_counter()
        cursor = self.text_edit.textCursor()
        cursor.beginEditBlock()
        i=0
        doc = self.text_edit.document()
        find_cursor = QtGui.QTextCursor(doc.begin())
        while True:
            find_cursor = doc.find(old[i], find_cursor)
            if not find_cursor.isNull():
                find_cursor.insertText(new[i])
                i=i+1
                if i == 20000:
                    break
            else:
                break
        cursor.endEditBlock()
        end_time = time.perf_counter()
        elapsed_time = end_time - start_time
        print("Elapsed time: {:.2f} seconds".format(elapsed_time))

Edit2: The goal of the application is that I have timestamps with unknown time zone settings, and instead of asking for the time zone, I have added a possible UTC offset to shift the time to UTC (when \ if needed). I have reworked the example and the approach:

from PyQt6 import QtWidgets, QtGui
import time
import datetime


class MainWindow(QtWidgets.QMainWindow):
    def __init__(self):
        super().__init__()

        # Create a plain text edit widget and add some text to it
        self.text_edit = QtWidgets.QPlainTextEdit()
        text = "2022-08-02T15:41:05.000  something or the other\n2022-08-02T15:41:06.000  Some parts may contain timestamps 2021-08-02T15:42:06.000 or\u2028New lines within a block\n2022-08-02T15:42:06.000  some other test"
        self.text_edit.setPlainText(text)
        self.setCentralWidget(self.text_edit)


        # Create a button and connect its clicked signal to the select_text function
        self.button = QtWidgets.QPushButton("Change Text")
        self.button.clicked.connect(self.shift_timezone)
        toolbar = self.addToolBar("Toolbar")
        toolbar.addWidget(self.button)

    def shift_timezone(self):
        text_timestamps = [1659447665, 1659447666, 1659447726]
        start_time = time.perf_counter()
        cursor = self.text_edit.textCursor()
        cursor.movePosition(cursor.MoveOperation.Start, cursor.MoveMode.MoveAnchor)
        cursor.beginEditBlock()
        i=0
        while i<self.text_edit.document().blockCount():
            dt = datetime.datetime.fromtimestamp(text_timestamps[i]+3600)
            iso_string = dt.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
            cursor.movePosition(cursor.MoveOperation.Right, cursor.MoveMode.KeepAnchor, 24)
            cursor.insertText(iso_string)
            cursor.movePosition(cursor.MoveOperation.NextBlock, cursor.MoveMode.MoveAnchor)
            i= i+1
        cursor.endEditBlock()
        end_time = time.perf_counter()
        elapsed_time = end_time - start_time
        print(f"Elapsed time: {elapsed_time:.2f} seconds")





if __name__ == "__main__":
    app = QtWidgets.QApplication([])
    window = MainWindow()
    window.show()
    app.exec()

(I intentionally added a space as a placeholder for Zulu in the original display text to account for multiple runs of the function)

This works perfectly on a functional level, but the performance is atrocious (7x the runtime of just dropping the document, and creating a new one with the updated timestamps). As I do not have control of the content, it can contain the exact same timestamps that I have, and it can also have line breaks within the block.

What I do know that all blocks will start with the timestamp, I can control the format of the timestamp, and I have the source with timestamp and the content in a different variable, making it possible to drop everything.

Am I making some obviously performance intensive mistake here?

Related Questions