def start_requests(self):
urls = [
'https://www.irrawaddy.com/category/business/feed'
]
for url in urls:
yield scrapy.Request(url, headers=self.headers, callback=self.parse, meta={'original_url': url})
def parse(self, response):
try:
original_url = response.meta.get('original_url')
final_url = response.url
print(f"{response.status}")
if response.status == 200:
print(f"Original URL: {original_url}, Final URL: {final_url}")
2024-03-27 14:31:29 [scrapy.core.engine] INFO: Spider opened
2024-03-27 14:31:29 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
2024-03-27 14:31:29 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6030
2024-03-27 14:31:30 [scrapy.core.engine] DEBUG: Crawled (403) <GET https://www.irrawaddy.com/category/business/feed> (referer: None)
2024-03-27 14:31:30 [scrapy.spidermiddlewares.httperror] INFO: Ignoring response <403 https://www.irrawaddy.com/category/business/feed>: HTTP status code is not handled or not allowed
2024-03-27 14:31:30 [scrapy.core.engine] INFO: Closing spider (finished)