state is "downloading", but torrent_file() returns NULL?

1.5k views Asked by At

I recently tried to write a scraper capable of downloading 3.5 million torrent files based on their magnet URL. I decided to start by hacking an example from libtorrent's tutorial webpage, but while it works well with just one torrent file, it fails segfaults in create_torrent() when I try to download more than one file. Here's my code:

#include <thread>
#include <chrono>
#include <fstream>
#include <sstream>
#include <string>

#include <libtorrent/session.hpp>
#include <libtorrent/add_torrent_params.hpp>
#include <libtorrent/create_torrent.hpp>
#include <libtorrent/torrent_handle.hpp>
#include <libtorrent/alert_types.hpp>
#include <libtorrent/bencode.hpp>
#include <libtorrent/torrent_status.hpp>
#include <libtorrent/torrent_info.hpp>

namespace lt = libtorrent;
using clk = std::chrono::steady_clock;

int torrents_left = 0;

int save_file(std::string const& filename, std::vector<char>& v)
{
  FILE* f = std::fopen(filename.c_str(), "wb");
  if (f == nullptr)
    return -1;

  int w = int(std::fwrite(&v[0], 1, v.size(), f));
  std::fclose(f);

  if (w < 0) return -1;
  if (w != int(v.size())) return -3;
  return 0;
}

void add_torrent_url(std::string url, lt::session& ses) {
//  std::cerr << "DEBUG: Will download '" << url << "'" << std::endl;
  lt::add_torrent_params atp;
  atp.url = url;
  atp.save_path = "."; // save in current dir
  ses.async_add_torrent(atp);
  torrents_left++;
}

void add_torrents_from_stdin(lt::session& ses) {
  std::cerr << "DEBUG: reading stdin." << std::endl;
  std::string url;
  while(std::getline(std::cin, url)) {
    add_torrent_url(url, ses);
  }
  std::cerr << "DEBUG: done reading stdin." << std::endl;
}

int main(int argc, char const* argv[])
{
  lt::settings_pack pack;
  pack.set_int(lt::settings_pack::alert_mask
      , lt::alert::error_notification
      | lt::alert::storage_notification
      | lt::alert::status_notification);

  lt::session ses(pack);

  lt::add_torrent_params atp;

  //add_torrent_url(argv[1]);
  add_torrent_url("magnet:?xt=urn:btih:3E37CFE29B1049E03F858758A73EFD85BA170BE8", ses);
  add_torrent_url("magnet:?xt=urn:btih:8FCDE178E3F9A24EA40856826C4E8A625A931B73", ses);
  //add_torrents_from_stdin(ses);

  // this is the handle we'll set once we get the notification of it being
  // added
  lt::torrent_handle h;
  for (;;) {
    std::vector<lt::alert*> alerts;
    ses.pop_alerts(&alerts);

    for (lt::alert const* a : alerts) {
      if (auto at = lt::alert_cast<lt::add_torrent_alert>(a)) {
        h = at->handle;
      }

      // if we receive the finished alert or an error, we're done
      if (lt::alert_cast<lt::torrent_finished_alert>(a)) {
        std::cout << "torrent finished or error." << std::endl;
        goto done;
      }

      if (lt::alert_cast<lt::torrent_error_alert>(a)) {
        std::cout << a->message() << std::endl;
        goto done;
      }

      if (auto st = lt::alert_cast<lt::state_update_alert>(a)) {
        if (st->status.empty()) continue;

        // we only have a single torrent, so we know which one
        // the status is for
        lt::torrent_status const& s = st->status[0];
        if (s.state == lt::torrent_status::downloading)
        {
          std::cout << "Hi!" << std::endl;
          std::shared_ptr<const lt::torrent_info> ti = h.torrent_file();
          if (ti == 0) {
            std::cerr << "ERROR: ti == NULL" << std::endl;
            goto done;
          }
          ses.remove_torrent(h, lt::session::delete_files);
          lt::create_torrent new_torrent(*ti);
          std::vector<char> out;
          lt::bencode(std::back_inserter(out), new_torrent.generate());

          std::stringstream ss;
          ss << "downloaded/" << (*ti).info_hash() << ".torrent";
          save_file(ss.str(), out);
          h.pause();
          torrents_left--;
          std::cerr << "DEBUG: Done (" << torrents_left << " left): " << (*ti).info_hash() << std::endl;
          if (torrents_left == 0)
            goto done;
        }
      }
    }
    std::this_thread::sleep_for(std::chrono::milliseconds(200));

    // ask the session to post a state_update_alert, to update our
    // state output for the torrent
    ses.post_torrent_updates();

  }
  done:
  {}
}

I suspect it's related to this part:

    // we only have a single torrent, so we know which one
    // the status is for
    lt::torrent_status const& s = st->status[0];

But according to my debugger, when torrent_file() gives NULL, st->status[] only contains one element anyway.

What's happening here? How do I fix it?

1

There are 1 answers

0
d33tah On

It looks like I made wrong assumptions about what "h" points to in the example. Here's a diff that fixes the code in question:

--- scrape_rasterbar.cpp        2017-01-07 21:00:39.565636805 +0100
+++ scrape_rasterbar_old.cpp    2017-01-07 21:05:53.339718098 +0100
@@ -1,4 +1,3 @@
-#include <iostream>
 #include <thread>
 #include <chrono>
 #include <fstream>
@@ -94,17 +93,18 @@
       if (auto st = lt::alert_cast<lt::state_update_alert>(a)) {
         if (st->status.empty()) continue;

-        for (auto &s : st->status) {
           // we only have a single torrent, so we know which one
           // the status is for
+        lt::torrent_status const& s = st->status[0];
           if (s.state == lt::torrent_status::downloading)
           {
-            std::shared_ptr<const lt::torrent_info> ti = s.handle.torrent_file();
+          std::cout << "Hi!" << std::endl;
+          std::shared_ptr<const lt::torrent_info> ti = h.torrent_file();
             if (ti == 0) {
               std::cerr << "ERROR: ti == NULL" << std::endl;
               goto done;
             }
-            ses.remove_torrent(s.handle, lt::session::delete_files);
+          ses.remove_torrent(h, lt::session::delete_files);
             lt::create_torrent new_torrent(*ti);
             std::vector<char> out;
             lt::bencode(std::back_inserter(out), new_torrent.generate());
@@ -112,7 +112,7 @@
             std::stringstream ss;
             ss << "downloaded/" << (*ti).info_hash() << ".torrent";
             save_file(ss.str(), out);
-            s.handle.pause();
+          h.pause();
             torrents_left--;
             std::cerr << "DEBUG: Done (" << torrents_left << " left): " << (*ti).info_hash() << std::endl;
             if (torrents_left == 0)
@@ -120,7 +120,6 @@
           }
         }
       }
-    }
     std::this_thread::sleep_for(std::chrono::milliseconds(200));

     // ask the session to post a state_update_alert, to update our