#!/usr/bin/python
from TwitterSearch import *
import sys
import csv
tso = TwitterSearchOrder() # create a TwitterSearchOrder object
tso.set_keywords(['gmo']) # let's define all words we would like to have a look for
tso.set_language('en') # we want to see English tweets only
tso.set_include_entities(False) # and don't give us all those entity information
max_range = 1 # search range in kilometres
num_results = 500 # minimum results to obtain
outfile = "output.csv"
# create twitter API object
twitter = TwitterSearch(
access_token = "764537836884242432-GzJmUSL4hcC2DOJD71TiQXwCA0aGosz",
access_token_secret = "zDGYDeigRqDkmdqTgBOltcfNcNnfLwRZPkPLlnFyY3xqQ",
consumer_key = "Kr9ThiJWvPa1uTXZoj4O0YaSG",
consumer_secret = "ozGCkXtTCyCdOcL7ZFO4PJs85IaijjEuhl6iIdZU0AdH9CCoxS"
)
# Create an array of USA states
ustates = [
"AL",
"AK",
"AS",
"AZ",
"AR",
"CA",
"CO",
"CT",
"DE",
"DC",
"FM",
"FL",
"GA",
"GU",
"HI",
"ID",
"IL",
"IN",
"IA",
"KS",
"KY",
"LA",
"ME",
"MH",
"MD",
"MA",
"MI",
"MN",
"MS",
"MO",
"MT",
"NE",
"NV",
"NH",
"NJ",
"NM",
"NY",
"NC",
"ND",
"MP",
"OH",
"OK",
"OR",
"PW",
"PA",
"PR",
"RI",
"SC",
"SD",
"TN",
"TX",
"UT",
"VT",
"VI",
"VA",
"WA",
"WV",
"WI",
"WY",
"USA"
]
def linearSearch(item, obj, start=0):
for i in range(start, len(obj)):
if item == obj[i]:
return True
return False
# open a file to write (mode "w"), and create a CSV writer object
csvfile = file(outfile, "w")
csvwriter = csv.writer(csvfile)
# add headings to our CSV file
row = [ "user", "text", "place"]
csvwriter.writerow(row)
#-----------------------------------------------------------------------
# the twitter API only allows us to query up to 100 tweets at a time.
# to search for more, we will break our search up into 10 "pages", each
# of which will include 100 matching tweets.
#-----------------------------------------------------------------------
result_count = 0
last_id = None
while result_count < num_results:
# perform a search based on latitude and longitude
# twitter API docs: https://dev.twitter.com/docs/api/1/get/search
query = twitter.search_tweets_iterable(tso)
for result in query:
state = 0
if result["place"]:
user = result["user"]["screen_name"]
text = result["text"]
text = text.encode('utf-8', 'replace')
place = result["place"]["full_name"]
state = place.split(",")[1]
if linearSearch(state,ustates):
print state
# now write this row to our CSV file
row = [ user, text, place ]
csvwriter.writerow(row)
result_count += 1
last_id = result["id"]
print "got %d results" % result_count
csvfile.close()
I am trying to categorize the tweets by my array ustates, but the second if block seems like it doesn't work. I had no idea about that. What I did was to do a linear search, if my item is equal to the item in my array, I will write it into a csv file.
as it looks like the problem is some whitespaces remaining, you can use
.strip()to remove themAlso some other tips
To speed up the membership test in
ustatesuse a set instead of a list because set have a constant time check, while list is a linear searchThe preferred way to open a file is using a context manager which ensure the closing of the file at the end of the block or in case of error in the block. Also use open instead of file
with those tip the code should look like