Much like this related question, I found myself using QScintilla to create a syntax highlighter that has to deal with non-ASCII characters (é, ä, ß, etc...). I use the trick described in the comments of that question to solve the problem, styling the characters base on the length of the utf-8 bytes rather than the Latin-1 bytes. When styling the entire document, it works fine.
However, my issue arises when using the start/end parameters to only style part of the document as there seems to be a mismatch between the start/end parameters and the actual length of the text being styled. I need to use this as I am dealing with large files that cause a 1-2 second input delay if I continuously style the entire document.
I have the following very simple example:
é
; Comment
When I open the file, which runs the highlighter from start to finish, it looks like that:
However, if I remove and re-type the comment, the colouring will always be one letter off.
This effect stacks indefinitely, with every non-ASCII character, the colouring goes off by another letter until it is a mess.
I have provided a minimal reproducible example below. All you have to do to notice the problem is start a comment below the last line, you will notice that your styling is always one off.
import sys
from PyQt6.QtWidgets import *
from PyQt6.QtCore import *
from PyQt6.QtGui import *
from PyQt6.Qsci import *
import re
class MyLexer(QsciLexerCustom):
def __init__(self, parent):
super(MyLexer, self).__init__(parent)
self.setDefaultColor(QColor("#ff000000"))
self.setDefaultPaper(QColor("#ffffffff"))
self.setDefaultFont(QFont("Consolas", 14))
self.setColor(QColor("#ff000000"), 0) # Style 0: black
self.setColor(QColor("#ff007f00"), 3) # Style 3: green
self.setPaper(QColor("#ffffffff"), 0) # Style 0: white
self.setPaper(QColor("#ffffffff"), 3) # Style 3: white
self.setFont(QFont("Consolas", 14, weight=QFont.Weight.Bold), 0) # Style 0: Consolas 14pt
self.setFont(QFont("Consolas", 14, weight=QFont.Weight.Bold), 3) # Style 3: Consolas 14pt
def language(self):
return "SimpleLanguage"
def description(self, style):
return str(style)
def styleText(self, start, end):
self.startStyling(start)
text = self.parent().text()[start:end]
p = re.compile(r"[*]\/|\/[*]|\s+|\w+|\W")
token_list = [ (token, len(bytearray(token, "utf-8"))) for token in p.findall(text)]
editor = self.parent()
apply_until_linebreak = None
if start > 0:
previous_style_nr = editor.SendScintilla(editor.SCI_GETSTYLEAT, start - 1)
if previous_style_nr in [2, 3]:
apply_until_linebreak = previous_style_nr
for i, token in enumerate(token_list):
if apply_until_linebreak is not None:
if "\n" in token[0]:
apply_until_linebreak = None
self.setStyling(token[1], 0)
else:
self.setStyling(token[1], apply_until_linebreak)
else:
if token[0] in ["/", ";"]:
apply_until_linebreak = 3
self.setStyling(token[1], 3)
else:
self.setStyling(token[1], 0)
myCodeSample = r"""
This is white
// This is a green comment
This is white again
// This comment has a (ä) special character
"""
class CustomMainWindow(QMainWindow):
def __init__(self):
super(CustomMainWindow, self).__init__()
self.setGeometry(300, 300, 800, 400)
self.setWindowTitle("QScintilla Test")
self.__frm = QFrame(self)
self.__frm.setStyleSheet("QWidget { background-color: #ffeaeaea }")
self.__lyt = QVBoxLayout()
self.__frm.setLayout(self.__lyt)
self.setCentralWidget(self.__frm)
self.__myFont = QFont()
self.__myFont.setPointSize(14)
self.__editor = QsciScintilla()
self.__editor.setText(myCodeSample)
self.__editor.setLexer(None) # We install lexer later
self.__editor.setUtf8(True) # Set encoding to UTF-8
self.__editor.setFont(self.__myFont) # Gets overridden by lexer later on
self.__lexer = MyLexer(self.__editor)
self.__editor.setLexer(self.__lexer)
self.__lyt.addWidget(self.__editor)
self.show()
if __name__ == '__main__':
app = QApplication(sys.argv)
QApplication.setStyle(QStyleFactory.create('Fusion'))
myGUI = CustomMainWindow()
sys.exit(app.exec())