I am trying to replace dates at the start of each block of my document, and I have a currently working PoC, but it feels sluggish. I think the problem is that I am using search when I know that the text to be replaced is in the beginning of the block, but I can't work out how I can select it.
from PyQt6 import QtWidgets, QtGui
import time
class MainWindow(QtWidgets.QMainWindow):
def __init__(self):
super().__init__()
# Create a plain text edit widget and add some text to it
self.text_edit = QtWidgets.QPlainTextEdit()
text = ''
for i in range(10000):
text+="2022 something or the other\n2022 some other test\n"
self.text_edit.setPlainText(text)
self.setCentralWidget(self.text_edit)
# Create a button and connect its clicked signal to the select_text function
self.button = QtWidgets.QPushButton("Change Text")
self.button.clicked.connect(self.select_text)
toolbar = self.addToolBar("Toolbar")
toolbar.addWidget(self.button)
def select_text(self):
old = ["2022"] * 20000
new = ["2023"] * 20000
start_time = time.perf_counter()
cursor = self.text_edit.textCursor()
cursor.beginEditBlock()
for i in range(self.text_edit.document().blockCount()):
block = self.text_edit.document().findBlockByNumber(i)
# Search for a specific string within the block
block_cursor = QtGui.QTextCursor(block)
# Get a QTextCursor object for the block
while not block_cursor.atEnd() and block_cursor.block() == block:
# block_cursor.movePosition(QtGui.QTextCursor.Right, QtGui.QTextCursor.KeepAnchor, 4)
block_cursor = block.document().find(old[i], block_cursor)
if not block_cursor.isNull():
block_cursor.insertText(new[i])
cursor.endEditBlock()
end_time = time.perf_counter()
elapsed_time = end_time - start_time
print(f"Elapsed time: {elapsed_time:.2f} seconds")
if __name__ == "__main__":
app = QtWidgets.QApplication([])
window = MainWindow()
window.show()
app.exec()
This is my current code, working as required, but I hope that selecting with move position instead of searching will speed it up.
Edit: Based on musicamante's answer, the improved code:
def select_text(self):
old = ["2022"] * 20000
new = ["2023"] * 20000
start_time = time.perf_counter()
cursor = self.text_edit.textCursor()
cursor.beginEditBlock()
i=0
doc = self.text_edit.document()
find_cursor = QtGui.QTextCursor(doc.begin())
while True:
find_cursor = doc.find(old[i], find_cursor)
if not find_cursor.isNull():
find_cursor.insertText(new[i])
i=i+1
if i == 20000:
break
else:
break
cursor.endEditBlock()
end_time = time.perf_counter()
elapsed_time = end_time - start_time
print("Elapsed time: {:.2f} seconds".format(elapsed_time))
Edit2: The goal of the application is that I have timestamps with unknown time zone settings, and instead of asking for the time zone, I have added a possible UTC offset to shift the time to UTC (when \ if needed). I have reworked the example and the approach:
from PyQt6 import QtWidgets, QtGui
import time
import datetime
class MainWindow(QtWidgets.QMainWindow):
def __init__(self):
super().__init__()
# Create a plain text edit widget and add some text to it
self.text_edit = QtWidgets.QPlainTextEdit()
text = "2022-08-02T15:41:05.000 something or the other\n2022-08-02T15:41:06.000 Some parts may contain timestamps 2021-08-02T15:42:06.000 or\u2028New lines within a block\n2022-08-02T15:42:06.000 some other test"
self.text_edit.setPlainText(text)
self.setCentralWidget(self.text_edit)
# Create a button and connect its clicked signal to the select_text function
self.button = QtWidgets.QPushButton("Change Text")
self.button.clicked.connect(self.shift_timezone)
toolbar = self.addToolBar("Toolbar")
toolbar.addWidget(self.button)
def shift_timezone(self):
text_timestamps = [1659447665, 1659447666, 1659447726]
start_time = time.perf_counter()
cursor = self.text_edit.textCursor()
cursor.movePosition(cursor.MoveOperation.Start, cursor.MoveMode.MoveAnchor)
cursor.beginEditBlock()
i=0
while i<self.text_edit.document().blockCount():
dt = datetime.datetime.fromtimestamp(text_timestamps[i]+3600)
iso_string = dt.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
cursor.movePosition(cursor.MoveOperation.Right, cursor.MoveMode.KeepAnchor, 24)
cursor.insertText(iso_string)
cursor.movePosition(cursor.MoveOperation.NextBlock, cursor.MoveMode.MoveAnchor)
i= i+1
cursor.endEditBlock()
end_time = time.perf_counter()
elapsed_time = end_time - start_time
print(f"Elapsed time: {elapsed_time:.2f} seconds")
if __name__ == "__main__":
app = QtWidgets.QApplication([])
window = MainWindow()
window.show()
app.exec()
(I intentionally added a space as a placeholder for Zulu in the original display text to account for multiple runs of the function)
This works perfectly on a functional level, but the performance is atrocious (7x the runtime of just dropping the document, and creating a new one with the updated timestamps). As I do not have control of the content, it can contain the exact same timestamps that I have, and it can also have line breaks within the block.
What I do know that all blocks will start with the timestamp, I can control the format of the timestamp, and I have the source with timestamp and the content in a different variable, making it possible to drop everything.
Am I making some obviously performance intensive mistake here?