I am trying to retrieve audio_features from all my songs (5985) in batches of 100 songs using Spotipy. A simpler version of the code I used without the exception handling worked fine for all the first 1416 items, then it is now completely blocked. I then tried to solve it by using the code below:
def exponential_backoff_retry(func, *args, max_retries=3, base_delay=1, **kwargs):
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except SpotifyException as e:
if e.http_status == 429:
print(f"Rate limited. Retrying in {base_delay * 2 ** attempt} seconds.")
time.sleep(base_delay * 2 ** attempt)
else:
raise e
print("Max retries exceeded. Unable to fetch track features.")
return None
def get_user_saved_track_features(sp, ids, start_track_id=None, batch_size=100):
tracks = []
# Iterate through each batch of track IDs
batches = [ids[i:i+batch_size] for i in range(start_track_id or 0, len(ids), batch_size)]
for batch in batches:
for track_id in batch:
meta = exponential_backoff_retry(sp.track, track_id)
name = meta['name']
album = meta['album']['name']
artist = meta['album']['artists'][0]['name']
release_date = meta['album']['release_date']
length = meta['duration_ms']
popularity = meta['popularity']
print(f"Processed meta for track ID {track_id}")
print(f"Processed all metatracks")
batch_features = exponential_backoff_retry(sp.audio_features, batch)
if batch_features:
for features in batch_features:
if features and features[0]:
print(f"Processing features {track_id}")
acousticness = features['acousticness']
danceability = features['danceability']
energy = features['energy']
instrumentalness = features['instrumentalness']
liveness = features['liveness']
loudness = features['loudness']
speechiness = features['speechiness']
tempo = features['tempo']
valence = features['valence']
time_signature = features['time_signature']
key = features['key']
mode = features['mode']
uri = features['uri']
tracks.append([name, album, artist, release_date, length, popularity,
acousticness, danceability, energy, instrumentalness,
liveness, loudness, speechiness, tempo, valence,
time_signature, key, mode, uri])
print(f"Processed track ID audio features {track_id}")
else:
print(f"Skipping track ID {track_id} because at least one feature value is None")
time.sleep(1) # Sleep for 1 second per song
elif batch_features is None:
print(f"Skipping batch due to error")
time.sleep(1) # Sleep for 1 second per batch to avoid rate limiting
# Create DataFrame from the list of track features
df = pd.DataFrame(tracks, columns=['name', 'album', 'artist', 'release_date',
'length', 'popularity', 'acousticness', 'danceability',
'energy', 'instrumentalness', 'liveness', 'loudness',
'speechiness', 'tempo', 'valence', 'time_signature',
'key', 'mode', 'uri'])
return df
Weirdly, the requests are accepted every time for this part:
for track_id in batch:
meta = exponential_backoff_retry(sp.track, track_id)
name = meta['name']
album = meta['album']['name']
artist = meta['album']['artists'][0]['name']
release_date = meta['album']['release_date']
length = meta['duration_ms']
popularity = meta['popularity']
print(f"Processed meta for track ID {track_id}")
But from batch_features = exponential_backoff_retry(sp.audio_features, batch), the 'Max Retries Reached' error is automatically caught from the first batch and even the first feature.
I also tried changing the start_id but no success.
according to the error message that you poster, the (code is 429) error response is a status code in the HTTP protocol indicating that the user has sent too many requests in a given amount of time ("rate limiting"). It's typically used by web servers to prevent abuse and ensure fair usage of resources.
Basically, you can:
it is probably better to understand the limits.
BTW: rate limiting is quite common.