I am currently learning to create distributed software with Akka, the program i am trying to make is a scraping program. The main challange i have is that i need control over how many simunitanies reqests that goes to the same host, so i will not get banned from the sites. As i have never used Akka before to any real extends could i really use some help with my design, if it is good or completly missed the point.
My design idea is to make a router, for each host, with the amount of routess that i want to continues hit the site with. With a cordinating actors to send the request to the right router.
As my Cordinator is a normal actors, can it only route one request at the time, are there a way to make it able to route multible requests at the time like the router, so it will not be a buttle neck?
My idea in code.
public class HttpRequestCordinator : ReceiveActor
{
private Dictionary<string, IActorRef> hostDownloader;
public HttpRequestCordinator()
{
hostDownloader = new Dictionary<string, IActorRef>();
this.Receive<HttpRequestMessage>(r =>
{
this.OnHttpRequesetMessage(r);
});
}
private void OnHttpRequesetMessage(HttpRequestMessage message)
{
var host = message.Address.Host.ToLower();
if (!hostDownloader.ContainsKey(host))
{
IActorRef child = Context.ActorOf(Props.Create(() => new HttpRequestActor()).WithRouter(new RoundRobinPool(1, new DefaultResizer(0, 10))));
hostDownloader.Add(host, child);
}
hostDownloader[host].Tell(message, Sender);
}
}
public class HttpRequestActor : ReceiveActor
{
public HttpRequestActor()
{
Receive<Messages.HttpRequestMessage>(async r =>
{
try
{
CancellationTokenSource cancellationToken = new CancellationTokenSource();
using (var handler = this.GetHandler())
{
using (HttpClient client = new HttpClient(handler))
{
AddDefaultHeadersToClient(client);
cancellationToken.CancelAfter(r.TimeOut);
var result = await client.SendAsync(r.Message, cancellationToken.Token);
Sender.Tell(result);
}
}
}
catch (Exception exp)
{
Sender.Tell(new HttpRequsetFailed(r));
}
});
}
private HttpClientHandler GetHandler()
{
return new HttpClientHandler()
{
UseCookies = false,
AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate
};
}
private void AddDefaultHeadersToClient(HttpClient client)
{
client.DefaultRequestHeaders.Add("Accept", "*/*");
client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate");
client.DefaultRequestHeaders.Add("Accept-Language", "da-DK,da;q=0.8,en-US;q=0.6,en;q=0.4");
client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.115 Safari/537.36");
client.DefaultRequestHeaders.Add("AcceptCharset", "utf-8");
}
}
You should add a throttling level between Coordinator and DownloadRouter http://doc.akka.io/docs/akka/snapshot/contrib/throttle.html
though I'm not sure if this is implemented in akka.net out of the box