C++ boost::qi parse space and newline delimited numbers as a 2D vector

108 views Asked by At

I have multiple floating point numbers lines, numbers in a line are space separated e.g.

1.2 2.2 3.2
1.1 2.1 3.1

I want to extract the above numbers as strings and parse to a 2D vector as; std::vector< std::vector< std::string > > { {"1.2", "2.2", "3.2"},{"1.1", "2.1", "3.1} }

My code is like below.

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <string>

namespace client
{
    namespace qi = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;

    using VecType = std::vector< std::vector< std::string >>; 
    
    struct employee
    {
        VecType name;
    };
}

BOOST_FUSION_ADAPT_STRUCT(
    client::employee,
    (client::VecType, name)
)
//]

namespace client
{
    template <typename Iterator>
    struct employee_parser : qi::grammar<Iterator, VecType(), ascii::space_type>
    {
        employee_parser() : employee_parser::base_type(start)
        {
            using qi::lexeme;
            using ascii::char_;

            number %= lexeme[+char_( "0-9." ) >>  qi::space ];
            start %= +number;
        }

        qi::rule<Iterator, std::string(), ascii::space_type> number;
        qi::rule<Iterator, VecType(), ascii::space_type> start;
    };
}

But this produces 2D vector which the outer vec size is 6 and each inner vector size is 1.

I don't understand how to split the string from the new line only to make only 2 inner vectors.

1

There are 1 answers

3
sehe On BEST ANSWER

You have to split up the rules. Let's start with the types:

using VecType = std::vector<std::string>;
using VecVecType = std::vector<VecType>;

Now, let's make a rule to parse a number, a row of numbers and multiple rows:

qi::rule<Iterator, std::string()> number;
qi::rule<Iterator, VecType(), qi::blank_type> row;
qi::rule<Iterator, VecVecType()> start;

Implementing them (note that I moved the skipper into the grammar because it's not a good idea to leak that into the interface):

number = raw [ double_ ]; // raw[] to get string value
row    = +number;
start  = qi::skip(blank) [ row % eol ];

Note: I used blank instead of space because we do NOT want to skip the eol which is significant for the grammar.

DEMO

Live On Coliru

#include <boost/spirit/include/qi.hpp>
#include <string>
#include <iomanip>

namespace client {
    namespace qi = boost::spirit::qi;

    using VecType = std::vector<std::string>;
    using VecVecType = std::vector<VecType>;
} // namespace client

namespace client {
    template <typename Iterator>
    struct my_parser : qi::grammar<Iterator, VecVecType()> {
        my_parser() : my_parser::base_type(start) {
            using namespace qi;

            number = raw [ double_ ]; // raw[] to get string value
            row    = *number;
            start  = qi::skip(blank) [ row % eol ];
        }

        qi::rule<Iterator, std::string()> number;
        qi::rule<Iterator, VecType(), qi::blank_type> row;
        qi::rule<Iterator, VecVecType()> start;
    };
} // namespace client

int main() {
    client::my_parser<std::string::const_iterator> const p;
    for (std::string const& input: {
            "",
            "1.2 2.2 3.2\n1.1 2.1 3.1",
            })
    {
        std::cout << "--- " << std::quoted(input) << " -----\n";
        auto f = begin(input), l = end(input);
        client::VecVecType output;
        if (parse(f, l, p, output)) {
            std::cout << "Parsed:\n";
            for (auto& row : output) {
                for (auto& v : row) {
                    std::cout << "\t" << v;
                }
                std::cout << "\n";
            }
        } else {
            std::cout << "Failed\n";
        }
        if (f!=l) {
            std::cout << "Remaining input: " << std::quoted(std::string(f,l)) << "\n";
        }
    }
}

Prints

--- "" -----
Parsed:

--- "1.2 2.2 3.2
1.1 2.1 3.1" -----
Parsed:
    1.2 2.2 3.2
    1.1 2.1 3.1

BONUS

Strong typing makes everything more fun: why parse into strings if you can parse into doubles?

Also showing how to enable rule debugging:

Live On Coliru

#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <string>
#include <iomanip>

namespace client {
    namespace qi = boost::spirit::qi;

    using VecType = std::vector<double>;
    using VecVecType = std::vector<VecType>;
} // namespace client

namespace client {
    template <typename Iterator>
    struct my_parser : qi::grammar<Iterator, VecVecType()> {
        my_parser() : my_parser::base_type(start) {
            using namespace qi;

            row    = *double_;
            start  = qi::skip(blank) [ row % eol ];

            BOOST_SPIRIT_DEBUG_NODES((start)(row))
        }

      private:
        qi::rule<Iterator, VecType(), qi::blank_type> row;
        qi::rule<Iterator, VecVecType()> start;
    };
} // namespace client

int main() {
    client::my_parser<std::string::const_iterator> const p;
    for (std::string const& input: {
            "",
            "1.2 2.2 3.2\n1.1 2.1 3.1",
            })
    {
        std::cout << "--- " << std::quoted(input) << " -----\n";
        auto f = begin(input), l = end(input);
        client::VecVecType output;
        if (parse(f, l, p, output)) {
            std::cout << "Parsed:\n";
            for (auto& row : output) {
                for (auto& v : row) {
                    std::cout << "\t" << v;
                }
                std::cout << "\n";
            }
        } else {
            std::cout << "Failed\n";
        }
        if (f!=l) {
            std::cout << "Remaining input: " << std::quoted(std::string(f,l)) << "\n";
        }
    }
}

Prints

--- "" -----
<start>
  <try></try>
  <row>
    <try></try>
    <success></success>
    <attributes>[[]]</attributes>
  </row>
  <success></success>
  <attributes>[[[]]]</attributes>
</start>
Parsed:

--- "1.2 2.2 3.2
1.1 2.1 3.1" -----
<start>
  <try>1.2 2.2 3.2\n1.1 2.1 </try>
  <row>
    <try>1.2 2.2 3.2\n1.1 2.1 </try>
    <success>\n1.1 2.1 3.1</success>
    <attributes>[[1.2, 2.2, 3.2]]</attributes>
  </row>
  <row>
    <try>1.1 2.1 3.1</try>
    <success></success>
    <attributes>[[1.1, 2.1, 3.1]]</attributes>
  </row>
  <success></success>
  <attributes>[[[1.2, 2.2, 3.2], [1.1, 2.1, 3.1]]]</attributes>
</start>
Parsed:
    1.2 2.2 3.2
    1.1 2.1 3.1