I'm trying to pull tweets by keywords and collect information on the users to analyze the texts they posted, the following is my code:
def get_tweets(self, keywords, num_tweets):
# Form a search query using all keywords
query = " OR ".join(keywords) + " -is:retweet lang:en"
# Define the fields to be returned in each Tweet object
tweet_fields = ['id', 'text', 'created_at', 'lang', 'public_metrics', 'context_annotations', 'author_id']
user_fields = ['username', 'description', 'location', 'verified']
tweets = tweepy.Paginator(self.client.search_recent_tweets,
query=query,
expansions=['author_id'],
tweet_fields=tweet_fields,
user_fields=user_fields,
max_results=100).flatten(limit=num_tweets)
tweet_data = []
for tweet in tweets:
if 'includes' in tweet.data and 'users' in tweet.includes:
users = {u["id"]: u for u in tweet.includes.get('users', [])}
user = users.get(tweet.data['author_id'], {})
else:
user = {}
tweet_data.append({
'id': tweet.data['id'],
'username': user.get('username', ''),
'text': tweet.data['text'],
'created_at': tweet.data['created_at'],
'lang': tweet.data['lang'],
'public_metrics': tweet.data.get('public_metrics', ''),
'context_annotations': tweet.data.get('context_annotations', ''),
'description': user.get('description', ''),
'location': user.get('location', ''),
'verified': user.get('verified', ''),
})
# Convert the list of tweet data into a DataFrame
df = pd.DataFrame(tweet_data)
df.to_csv('tweetsf1.csv', index=False)
This is how I call the def to search by key terms:
# Pass a list of keywords to the function, TwitterClient is a class that includes token info
client = TwitterClient()
client.get_tweets(["test", "test app", \
"test example"], \
1000)
For some reason, all the fields under users are empty, such as username, description , location , and verified. This doesn't happen if I use client.get_tweet, but that will require too many requests , there are only 60 requests per every 15 mins, and it may require 4 hrs to pull 1000 tweets. I can only afford the basic plan.