I have been trying this out for a long time now. I want to scrap contents from a subreddit that has adult contents. But, the problem is that, you have to answer a simple question before you are given access to that page i.e. if you are 18+ or not. I did some research on the source code and found that the solution is a simple POST request. where you need to send the parameters "over18=yes". But my problem is that, I am not able to access the response body after the post.
Here's the code using http request in node. I have even tried it out with the node "request" module, but no help from that either.
Hoping to find someone who can help me out here.
var http = require("http");
var options = {
host: 'www.reddit.com',
port: 80,
path: '/over18?dest=http%3A%2F%2Fwww.reddit.com%2Fr%2Fnsfw&over18=yes',
method: 'POST'
};
var req = http.request(options, function(res) {
console.log('STATUS: ' + res.statusCode);
console.log('HEADERS: ' + JSON.stringify(res.headers));
res.setEncoding('utf8');
res.on('data', function (chunk) {
console.log('BODY: ' + chunk);
});
});
req.on('error', function(e) {
console.log('problem with request: ' + e.message);
});
// write data to request body
req.write('data\n');
req.write('data\n');
req.end();
And here is the code using the Node Request module
var request = require("request");
request.post({url:'http://www.reddit.com/over18?dest=http%3A%2F%2Fwww.reddit.com%2Fr%2Fnsfw', form: {over18:'yes'}}, function(err,httpResponse,body){
console.log(body);
});
the URL i am trying to access is http://www.reddit.com/r/nsfw
In short, when you click YES button, the form sends over18=yes parameter to url http://www.reddit.com/over18?dest=http%3A%2F%2Fwww.reddit.com%2Fr%2Fnsfw using POST method. Then, server responds with an 302 Redirection header, cookie with value over18=1 and finally redirects to url http://www.reddit.com/r/nsfw using GET request. THen, server just checks if youa have a cookie with needed valuse.
All what you need is to do request directly to the final url with cookies using GET method.