I have a text file which contains accented characters such as: 'č', 'š', 'ž'. When I read this file with a Python program and put the file content into a Python list the accented characters are lost, Python replaces them with other characters. For example: 'č' is replaced by '_'. Does anyone know how I can keep the accented characters in a Python program, when I read them from a file? My code:
import sqlite3 #to work with relational DB
conn = sqlite3.connect('contacts.sqlite') #connect to db
cur = conn.cursor() #db connection handle
cur.execute("DROP TABLE IF EXISTS contacts")
cur.execute("CREATE TABLE contacts (id INTEGER, name TEXT, surname TEXT, email TEXT)")
fname = "acos_ibm_notes_contacts - test.csv"
fh = open(fname) #file handle
print " "
print "Reading", fname
print " "
#--------------------------------------------------
#First build a Python list with new contacts data: name, surname and email address
lst = list() #temporary list to hold content of the file
new_contact_list = list() #this list will contain contatcs data: name, surname and email address
count = 0 # to count number of contacts
id = 1 #will be used to add contacts id into the DB
for line in fh: #for every line in the file handle
new_contact = list()
name = ''
surname = ''
mail = ''
#split line into tokens at each '"' character and put tokens into the temporary list
lst = line.split('"')
if lst[1] == ',': continue #if there is no first name, move to next line
elif lst[1] != ',': #if 1st element of list is not empty
name = lst[1] #this is the name
if name[-1] == ',': #If last character in name is ','
name = name[:-1] #delete it
new_contact.append({'Name':name}) #add first name to new list of contacts
if lst[5] != ',': #if there is a last name in the contact data
surname = lst[5] #assign 5th element of the list to surname
if surname[0] == ',': #If first character in surname is ','
surname = surname[1:] #delete it
if surname[-1] == ',': #If last character in surname is ','
surname = surname[:-1] #delete it
if ',' in surname: #if surname and mail are merged in same list element
sur_mail = surname.split(',') #split them at the ','
surname = sur_mail[0]
mail = sur_mail[1]
new_contact.append({'Surname':surname}) #add last name to new list of contacts
new_contact.append({'Mail':mail}) #add mail address to new list of contacts
new_contact_list.append(new_contact)
count = count + 1
fh.close()
#--------------------------------------------------
# Second: populate the DB with data from the new_contact_list
row = cur.fetchone()
id = 1
for i in range(count):
entry = new_contact_list[i] #every row in the list has data about 1 contact - put it into variable
name_dict = entry[0] #First element is a dictionary with name data
surname_dict = entry[1] #Second element is a dictionary with surname data
mail_dict = entry[2] #Third element is a dictionary with mail data
name = name_dict['Name']
surname = surname_dict['Surname']
mail = mail_dict['Mail']
cur.execute("INSERT INTO contacts VALUES (?, ?, ?, ?)", (id, name, surname, mail))
id = id + 1
conn.commit() # Commit outstanding changes to disk
-----------------------------------
This is simplified version of the program with no DB, just printing to screen
import io
fh = io.open("notes_contacts.csv", encoding="utf_16_le") #file handle
lst = list() #temporary list to hold content of the file
new_contact_list = list() #this list will contain the contact name, surname and email address
count = 0 # to count number of contacts
id = 1 #will be used to add contacts id into the DB
for line in fh: #for every line in the file handle
print "Line from file:\n", line # print it for debugging purposes
new_contact = list()
name = ''
surname = ''
mail = ''
#split line into tokens at each '"' character and put tokens into the temporary list
lst = line.split('"')
if lst[1] == ',': continue #if there is no first name, move to next line
elif lst[1] != ',': #if 1st element of list is not empty
name = lst[1] #this is the name
print "Name in variable:", name # print it for debugging purposes
if name[-1] == ',': #If last character in name is ','
name = name[:-1] #delete it
new_contact.append({'Name':name}) #add first name to new list of contacts
if lst[5] != ',': #if there is a last name in the contact data
surname = lst[5] #assign 5th element of the list to surname
print "Surname in variable:", surname # print it for debugging purposes
if surname[0] == ',': #If first character in surname is ','
surname = surname[1:] #delete it
if surname[-1] == ',': #If last character in surname is ','
surname = surname[:-1] #delete it
if ',' in surname: #if surname and mail are merged in same list element
sur_mail = surname.split(',') #split them at the ','
surname = sur_mail[0]
mail = sur_mail[1]
new_contact.append({'Surname':surname}) #add last name to new list of contacts
new_contact.append({'Mail':mail}) #add mail address to new list of contacts
new_contact_list.append(new_contact)
print "New contact within the list:", new_contact # print it for debugging purposes
fh.close()
And this is the content of the file notes_contacts.csv, it has 1 line only:
Aco,"",Vidovič,[email protected],+38613208872,"",+38640456872,"","","","","","","","",""
try to use
# coding=utf-8
at the first line of code program