Linked Questions

Popular Questions

Data usage of tweepy streams

Asked by At

how much data would a typical tweepy stream use?

Asking cause I ran on AWS machine for 12 hours and got charged 1100$

The charges were under the category "AWS Kinesis"

Does tweepy send a request every millisecond or something?

How would I change it to not cost such an exuberate amount of money because I am not sure I can even pay the fee...

Here is my code for reference

class MyStreamListener(tweepy.StreamListener):
    def handle_db_update(self,tweet):
      embed_urls = get_embed_html('https://www.twitter.com/abc/status/'+str(tweet.id))
      my_json = json.dumps(tweet._json)
      botscore = bot_score(tweet.user.screen_name)
      print('username',tweet.user.screen_name,'bot score',botscore)
      URLs=re.findall(r'(https?://[^\s]+)', tweet.text)
      unshortened = [requests.get(str(url)).url for url in URLs]
      unshortened = [url for url in unshortened if "https://t.co" not in url]
      bot_score_list = [decimal.Decimal(str(botscore)) for i in range(len(unshortened))]
      response = db.put_item(
          Item={
            'id' : tweet.id,
            'date' : str(tweet.created_at),
            'json' : my_json,
            'html_embed' : embed_urls,
            'urls' : str(unshortened),
            'url_types' : str([url_type(url) for url in unshortened])
        })
      for typee,url in [(url_type(url),url) for url in unshortened]:
         countdb.update_item(
            Key={
                'type' : typee
                },
            UpdateExpression="set amount = amount + :one",
            ExpressionAttributeValues={
            ':one' : decimal.Decimal(1)
            },
            ReturnValues="UPDATED_NEW"
        )
         countdb.update_item(
            Key={
                'type' : typee
                },
            UpdateExpression=" add URLs :url",
            ExpressionAttributeValues={
            ':url' : set(unshortened)
            },
            ReturnValues="UPDATED_NEW"
        )
         countdb.update_item(
            Key={
                'type' : typee
                },
            UpdateExpression=" add BotScores :score",
            ExpressionAttributeValues={
            ':score' : set(bot_score_list)
            },
            ReturnValues="UPDATED_NEW"
        )


    def on_status(self,status):
        if not status.retweeted and 'RT @' not in status.text:
            print(status.text)
            self.handle_db_update(status)

    def on_error(self,error):
        if error == 420:  
            return False


myListener = MyStreamListener()

while True:
    try:
        myStream = tweepy.Stream(auth = api.auth, listener=myListener)

        myStream.filter(track=['vaccinations autism','vaccination autism','vaccines autism','vaccine autism','measles-mumps-rubella autism','MMR autism','mmr autism',
                    'vaccinations autistic disorder','vaccination autistic disorder','vaccines autistic disorder', 'vaccine autistic disorder' , 
                    'measles-mumps-rubella autistic disorder', 'MMR autistic disorder', 'mmr autistic disorder'])
    except Exception as e:
        print(traceback.format_exc())
        time.sleep(15)

Related Questions