A friend and I created the following script utilizing BeautifulSoup to get the HTML of a job page, then append the job to an array, then a file, then email the job in a human-readable format to ourselves. The script works on Ubuntu, but on my Raspberry Pi, which uses Raspbian, it doesn't work.
The only message I see when running from the terminal is: 'end of file' and 'Start write...' which are lines in the code. There are no error messages when running from the Pi, but nothing gets appended to the array and no emails are sent.
Can someone take a look? Thanks.
import urllib2, email, smtplib, os.path
import cPickle as pickle
from bs4 import BeautifulSoup
class Job:
"""docstring for Job"""
def __init__(self, title, date, url):
self.title = title
self.date = date
self.url = "http://www.forensicfocus.com/"+url
def describJob(self):
return (self.title +" "+ self.date +" "+ self.url)
def createJobsArray():
soup = BeautifulSoup(urllib2.urlopen('http://www.forensicfocus.com/jobs').read())
bigFatString = soup.find_all('a')
#print(bigFatString) #this gets webpage as html. No issues here
findAll = soup.find_all("tr", class_="topic")
jobsArray = []
for section in findAll:
title = section.find("a", class_="topictitle").get_text()
titleEncoded = title.encode('ascii','ignore')
row = section.find_all("td")
date = row[3].find("div").get_text()
url = section.find_all("a")[3].get("href")
job = Job(titleEncoded, date, url)
print "printing job"
print job
print "printing job"
jobsArray.append(job)
return jobsArray
def sendEmail(job):
senderEmail = "[email protected]"
recipients = ["[email protected]"]
s = smtplib.SMTP("smtp.gmail.com",587)
s.ehlo()
s.starttls()
s.ehlo()
s.login(senderEmail, 'pass_goes_here')
for job in jobsFilteredByLocation:
msg = email.message_from_string(job.describJob())
msg['Subject'] = "New Job Found: " + job.title
s.sendmail(senderEmail, recipients, msg.as_string())
print "Sending email..."
s.quit()
def saveJobsToDisk(jobs):
with open('hadooken', 'wb') as output:
print "Start write..."
for job in jobs:
print job.title
pickle.dump(job, output)
output.close()
def getJobsFromDisk():
oldJobsArray = []
with open('hadooken', 'rb') as input:
while True:
try:
job = pickle.load(input)
print job.title, "was successfully read from file"
oldJobsArray.append(job)
except EOFError:
print "end of file"
break
return oldJobsArray
input.close()
# SCRIPT STARTS HERE
with open('hadooken', 'ab') as input:
input.close()
locationsArray = ["London"]
jobsArray = createJobsArray()
oldJobsArray = getJobsFromDisk()
jobsFilteredByLocation = []
for job in jobsArray:
for location in locationsArray:
found = job.title.find(location)
if found > 0:
if len(oldJobsArray) > 0:
if any(oldJob.title == job.title for oldJob in oldJobsArray):
print "Job previously found and sent..."
else:
print "adding ", job.title, "to array because it isnt in the old array"
jobsFilteredByLocation.append(job)
else:
print "adding ", job.title, "to array"
jobsFilteredByLocation.append(job)
sendEmail(jobsFilteredByLocation)
mergedArray = oldJobsArray + jobsFilteredByLocation
for job in mergedArray:
print "Job title: ", job.title
saveJobsToDisk(mergedArray)