Problem downloading files with boost asio, junk in output file

72 views Asked by At

I'm trying to download any files from server - images, text/html, zip etc. But somehow I have some trash in the beginning and at the end of my output file.

#include "dependencies.h"

asio::streambuf response_;
std::istream is(&response_);
std::ofstream file("file.html", std::ios::out);

void readHeader(asio::ssl::stream<tcp::socket>& socket);
void asyncRead(asio::ssl::stream<tcp::socket>& socket);

void readStatus(asio::ssl::stream<tcp::socket>& socket) {

    asio::async_read_until(socket, response_, "\r\n", [&](boost::system::error_code ec, size_t length) {
        if (!ec) {
            std::string header;
            is >> header;
            std::cout << "Protocol: " << header << '\n';
            std::string status_code;
            std::string status;
            is >> status_code;
            std::cout << "Status: " << status_code << '\n';
            std::string status_message;
            std::getline(is, status_message);

            readHeader(socket);
        }
        else if (ec != asio::error::eof) {
            std::cout << "[Status code] Error: " << ec.message() << '\n';
        }
    });
}

void readHeader(asio::ssl::stream<tcp::socket>& socket) {

    asio::async_read_until(socket, response_, "\r\n\r\n", [&](boost::system::error_code ec, size_t length) {
        if (!ec) {
            std::string status_message;
            while (getline(is, status_message) && status_message != "\r")
                std::cout << status_message << '\n';

            asyncRead(socket);
        }
        else if (ec != asio::error::eof) {
            std::cout << "[Header message] Error: " << ec.message() << '\n';
        }
        });
}

void asyncRead(asio::ssl::stream<tcp::socket>& socket) {
    asio::async_read(socket, response_, asio::transfer_at_least(1), [&](const boost::system::error_code ec, size_t length) {
        if (!ec) {
            file << &response_;

            asyncRead(socket);
        }
        else if (ec != asio::error::eof) {
            std::cout << "[Async reading] Error: " << ec.what() << std::endl;
        }
    });
}

int main(int args, const char* argv[]) {

    setlocale(LC_ALL, "Rus");

    boost::system::error_code ec;
    asio::io_context context;
    asio::ssl::context sslContext(asio::ssl::context::method::sslv23_client);
    sslContext.set_default_verify_paths();
    asio::ssl::stream<tcp::socket> socket(context, sslContext);

    //https://codeload.github.com/ERHZAN/NRM-Launcher/zip/refs/heads/main
    boost::urls::url url = boost::urls::url_view("https://en.cppreference.com/w/");
    std::string path = url.path();
    std::string host = url.host();
    std::string scheme = url.scheme();

    asio::streambuf request_;
    std::ostream os(&request_);
    os << "GET " << path << " HTTP/1.1" << "\r\n";
    os << "Host: " << host << "\r\n";
    os << "Accept: */*\r\n";
    os << "Connection: close\r\n\r\n";

    tcp::resolver resolver(context);
    tcp::resolver::query query(host, "https");
    resolver.async_resolve(query, [&](const boost::system::error_code& ec, tcp::resolver::iterator ep_iterator) {
        if (!ec) {
            socket.set_verify_mode(boost::asio::ssl::verify_peer);
            socket.set_verify_callback([&](bool preverified, boost::asio::ssl::verify_context& ctx) {
                char subject_name[256];
                X509* cert = X509_STORE_CTX_get_current_cert(ctx.native_handle());
                X509_NAME_oneline(X509_get_subject_name(cert), subject_name, 256);
                std::cout << "Verifying " << subject_name << "\n";

                return true;
            });

            socket.lowest_layer().async_connect(ep_iterator->endpoint(), [&](const boost::system::error_code& ec) {
                if (!ec) {
                    socket.async_handshake(asio::ssl::stream_base::handshake_type::client, [&](const boost::system::error_code& ec) {
                        if (!ec) {
                            asio::async_write(socket, request_, [&](const boost::system::error_code ec, size_t length) {
                                if (!ec) {
                                    readStatus(socket);
                                }
                                else {
                                    std::cout << ec.what() << std::endl;
                                }
                            });
                        }
                        else {
                            std::cout << ec.what() << std::endl;
                        }
                    });
                }
                else {
                    std::cout << "[Connection] Error: " << ec.what() << std::endl;
                }
                });
        }
    });
    context.run();

    return 0;
}

And my output file is:

ab5b

<!DOCTYPE html>
<html lang="en" dir="ltr" class="client-nojs">
i've deleted all html here for your convenience
</html>


0




1

There are 1 answers

0
Nick Matteo On

It's chunk encoding.

Each chunk is preceded by its size in bytes. The transmission ends when a zero-length chunk is received. The chunked keyword in the Transfer-Encoding header is used to indicate chunked transfer.

[...]

Each chunk starts with the number of octets of the data it embeds expressed as a hexadecimal number in ASCII followed by optional parameters (chunk extension) and a terminating ␍␊ sequence, followed by the chunk data. The chunk is terminated by ␍␊.

Perhaps try a library which handles HTTP, such as Boost Beast, which will take care of that for you. Or try the solutions at How to tell the HTTP server to not send chunked encoding: Namely, specify HTTP/1.0.