Having problems using boost spirit to identify token string with double brackets

27 views Asked by At

While parsing a string:

1815 ((sd-pam)) S 1778 1778 1778 ...

I am trying to omit the "((sd-pam))" token

I used the following:

omit_string %= lexeme['(' >> +(char_ - ')') >> ')'];

then I would use it in the following way below:

    start %=
        int_
        >> omit[omit_string >> char_ >> repeat(10)[long_]]
        >> long_ >> long_ >> long_ >> long_
     ;

This works with a single bracket at the end (ex. "((sd-pam)" ), but does not work with more than one at the ned. I can't seem to find any doc on the relevant syntax to figure out how I can achieve the desired result.

1

There are 1 answers

4
sehe On

It all depends on your actual grammar.

Your samples are not enough to specify what you want. To parse the sample you would simply adjust the rule:

omit_string = "((" >> *(qi::char_ - "))") >> "))";

To also support (sd-pam):

omit_string =                             //
    "((" >> *(qi::char_ - "))") >> "))" | //
    "(" >> *(qi::char_ - ")") >> ")"      //
    ;

See it Live On Coliru

// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
namespace qi = boost::spirit::qi;

namespace Parsing {
    struct Data {
        int  leader;
        long a, b, c, d;
    };
    using boost::fusion::operator<<;
} // namespace Parsing

BOOST_FUSION_ADAPT_STRUCT(Parsing::Data, leader, a, b, c, d)

namespace Parsing {
    template <typename It> struct DataParser : qi::grammar<It, Data()> {
        DataParser() : DataParser::base_type(start) {
            start = qi::skip(qi::space)[data];

            omit_string =                             //
                "((" >> *(qi::char_ - "))") >> "))" | //
                "(" >> *(qi::char_ - ")") >> ")"      //
                ;

            data = qi::int_                                                        //
                >> qi::omit[omit_string >> qi::char_ >> qi::repeat(10)[qi::long_]] //
                >> qi::long_ >> qi::long_ >> qi::long_ >> qi::long_                //
                ;

            BOOST_SPIRIT_DEBUG_NODES((start)(data)(omit_string));
        }

      private:
        qi::rule<It, Data()>                 start;
        qi::rule<It, Data(), qi::space_type> data;
        // lexemes:
        qi::rule<It> omit_string;
    };
} // namespace Parsing

int main() {
    using It = std::string::const_iterator;
    static Parsing::DataParser<It> const p{};

    for (std::string const input : {
             "1815 ((sd-pam)) S 1778 1778 1778 1778 1778 1778 1778 1778 1778 1778 11 22 33 44",
             "1815 (sd-pam) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
         }) {
        std::cout << "========\nTesting " << quoted(input) << "\n";

        auto f = begin(input), l = end(input);
        if (Parsing::Data d; parse(f, l, p, d)) {
            std::cout << "Parsed: " << d << "\n";
        } else {
            std::cout << "Parsed failed\n";
        }

        if (f != l)
            std::cout << " -- Remaining unparsed input: " << quoted(std::string(f, l)) << "\n";
    }
}
Printing

========

Testing "1815 ((sd-pam)) S 1778 1778 1778 1778 1778 1778 1778 1778 1778 1778 11 22 33 44"

Parsed: (1815 11 22 33 44)

========

Testing "1815 (sd-pam) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44"

Parsed: (1815 -11 -22 -33 -44)

UPDATE

To the updated requirements in the comments suggested some improving solutions including:

quoted_string = '(' >> *~qi::char_(')') >> +qi::char_(')');

And finally

quoted_string = '(' >> *~qi::char_(')') >> +(quoted_string | ')');

Which passes all of the edge test cases:

for (std::string const input : {
         "1815 ((sd-pam)) S 1778 1778 1778 1778 1778 1778 1778 1778 1778 1778 11 22 33 44",
         "1815 (sd-pam) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
         // new requirements
         "1815 ((sd-pam))) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
         // now they all work:
         "1815 () S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
         "1815 ((sd-pam)))S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
         "1815 ((sd-pam)))\tS 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
         // and for the right reasons:
         "1815 (()) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
         "1815 ()) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",

         // and even these do with the last version
         "1815 ((sd-pam)(some more)) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
         "1815 ((sd-pam))))(some more)) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
         "1815 ((sd-pam)(some more))))) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
     }) {

See it Live On Coliru

// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
namespace qi = boost::spirit::qi;

namespace Parsing {
    struct Data {
        int  leader;
        long a, b, c, d;
    };
    using boost::fusion::operator<<;
} // namespace Parsing

BOOST_FUSION_ADAPT_STRUCT(Parsing::Data, leader, a, b, c, d)

namespace Parsing {
    template <typename It> struct DataParser : qi::grammar<It, Data()> {
        DataParser() : DataParser::base_type(start) {
            start = qi::skip(qi::space)[data];

            // perhaps:
            quoted_string = '(' >> *~qi::char_(')') >> +(quoted_string | ')');

            data = qi::int_                                                          //
                >> qi::omit[quoted_string >> qi::char_ >> qi::repeat(10)[qi::long_]] //
                >> qi::long_ >> qi::long_ >> qi::long_ >> qi::long_                  //
                ;

            BOOST_SPIRIT_DEBUG_NODES((start)(data)(quoted_string));
        }

      private:
        qi::rule<It, Data()>                 start;
        qi::rule<It, Data(), qi::space_type> data;
        // lexemes:
        qi::rule<It> quoted_string;
    };
} // namespace Parsing

int main() {
    using It = std::string::const_iterator;
    static Parsing::DataParser<It> const p{};

    for (std::string const input : {
             "1815 ((sd-pam)) S 1778 1778 1778 1778 1778 1778 1778 1778 1778 1778 11 22 33 44",
             "1815 (sd-pam) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
             // new requirements
             "1815 ((sd-pam))) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
             // now they all work:
             "1815 () S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
             "1815 ((sd-pam)))S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
             "1815 ((sd-pam)))\tS 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
             // and for the right reasons:
             "1815 (()) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
             "1815 ()) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",

             // and even these do with the last version
             "1815 ((sd-pam)(some more)) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
             "1815 ((sd-pam))))(some more)) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
             "1815 ((sd-pam)(some more))))) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44",
         }) {
        std::cout << "========\nTesting " << quoted(input) << "\n";

        auto f = begin(input), l = end(input);
        if (Parsing::Data d; parse(f, l, p, d)) {
            std::cout << "Parsed: " << d << "\n";
        } else {
            std::cout << "Parsed failed\n";
        }

        if (f != l)
            std::cout << " -- Remaining unparsed input: " << quoted(std::string(f, l)) << "\n";
    }
}

Printing

========

Testing "1815 ((sd-pam)) S 1778 1778 1778 1778 1778 1778 1778 1778 1778 1778 11 22 33 44"

Parsed: (1815 11 22 33 44)

========

Testing "1815 (sd-pam) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44"

Parsed: (1815 -11 -22 -33 -44)

========

Testing "1815 ((sd-pam))) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44"

Parsed: (1815 -11 -22 -33 -44)

========

Testing "1815 () S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44"

Parsed: (1815 -11 -22 -33 -44)

========

Testing "1815 ((sd-pam)))S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44"

Parsed: (1815 -11 -22 -33 -44)

========

Testing "1815 ((sd-pam)))   S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44"

Parsed: (1815 -11 -22 -33 -44)

========

Testing "1815 (()) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44"

Parsed: (1815 -11 -22 -33 -44)

========

Testing "1815 ()) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44"

Parsed: (1815 -11 -22 -33 -44)

========

Testing "1815 ((sd-pam)(some more)) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44"

Parsed: (1815 -11 -22 -33 -44)

========

Testing "1815 ((sd-pam))))(some more)) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44"

Parsed: (1815 -11 -22 -33 -44)

========

Testing "1815 ((sd-pam)(some more))))) S 0 0 0 0 0 0 0 0 0 0 -11 -22 -33 -44"

Parsed: (1815 -11 -22 -33 -44)