I know 2 methods to extract cloudwatch log but none of them cover both whole and parsing:
- method 1 (limitation: I can't get the whole logs since limitation is 10000 and doesn't cover paginating):
query_string = f"parse @message \"[*] - * - *\" as eventTS, eventType, eventMessage"
# initialize CloudWatch Logs client
client = boto3.client(
service_name='logs',
region_name=aws_region,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key
)
# start log query
start_query_response = client.start_query(
logGroupName=log_group_name,
startTime=start_time,
endTime=end_time,
queryString=query_string,
)
# get queryId
query_id = start_query_response['queryId']
# wait for query result
while True:
response = client.get_query_results(queryId=query_id)
if response['status'] == 'Running':
time.sleep(5)
elif response['status'] == 'Complete':
break
else:
print("error")
raise
len_rows = 0
if response['status'] == 'Complete':
for item in response['results']:
row = {}
for event in item:
key = event['field']
value = event['value']
if key != '@ptr':
row[key] = value
print(row)
print()
len_rows += 1
- method 2 (limitation: doesn't accept a parse string):
client = boto3.client(
service_name='logs',
region_name=aws_region,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key
)
rows = []
next_token = {}
while True:
response = client.filter_log_events(
logGroupName=log_group_name,
logStreamNames=list_log_stream,
startTime=start_time,
endTime=end_time,
filterPattern=filter_pattern,
**next_token
)
rows += [item for item in
[str(parse_event(event)) for event in response['events']]
if item != 'None']
if 'nextToken' not in response:
break
next_token = {'nextToken': response['nextToken']}