I am trying to check if a row already exists. If it doesn't, something has to be written in the row. My CSV file is always empty.
# import libraries
import csv
import urllib2
from bs4 import BeautifulSoup
# integer for first article id
articleid = 4449
articles = 4459
while articleid < articles:
# specify the url and article id
url = 'http://www.bkfrem.dk/default.asp?vis=nyheder&id='+str(articleid)
articleid += 1
# query the website and return the html to the variable
page = urllib2.urlopen(url)
# parse the html using beautiful soup and store in variable soup
soup = BeautifulSoup(page, 'html.parser')
# create CSV file
csvfile = csv.writer(open('news.csv', 'a'))
# take out the <div> of name and get its value and text
title_box = soup.find('h1', attrs={'style': 'margin-bottom:0px'})
title = title_box.text.encode('utf-8').strip()
date_box = soup.find('div', attrs={'style': 'font-style:italic; padding-bottom:10px'})
date = date_box.text.encode('utf-8').strip()
articleText_box = soup.find('div', attrs={'class': 'news'})
articleText = articleText_box.text.encode('utf-8').strip()
# print the data (encoded) to the CSV file
with open('news.csv', 'rb') as csvfileO:
f_reader = csv.reader(csvfileO, delimiter=',')
for row in f_reader:
if articleText not in row:
csvfile.writerow(["Title", "Date", "Text"])
csvfile.writerow((title, date, articleText))
What am I doing wrong since it's empty?
You have
if articleText not in
Not in what? You should have it pointing to something to validate.