Boost Spiri Qi recursive expression parser

42 views Asked by At
    struct NestedStr;

    typedef boost::variant<std::string, boost::recursive_wrapper< NestedStr>> expression;

    struct NestedStr
    {
        vector<expression> children;  // variant instantiated here...

        NestedStr(const vector<expression> & child) : children(child)        
        {
        }

        NestedStr()
        {
        }
    };

    template <typename Iterator>
        struct TimingGrammar : qi::grammar<Iterator, SDFDelayInfo(), ascii::space_type>
    {
        TimingGrammar() : TimingGrammar::base_type(start)
        {

            str_rule            %= +(qi::char_ - (qi::lit('(') | qi::lit(')')));
            brace_rule          %= '(' >> str_rule >> brace_rule >> ')'; 

            start   %= '(' >> qi::lit("TIMING") >> brace_rule ')';
        }

        qi::rule<Iterator, string(), ascii::space_type> str_rule;
        qi::rule<Iterator, NestedStr(), ascii::space_type> start;


    };

I am parsing this nested expression and want to parse till the closing bracket.

(TIMING (RECOVERY (COND(COND(COND(COND))))) (WIDTH (COND AB == 1'b1 (pos CK)) (0.040::0.040)) )

But facing long compiler error.

And second question is there any way whenever "(TIMING" is encountered, omit everything in between the closing bracket of "(TIMING" and move the string iterator after the closing bracket.

1

There are 1 answers

0
sehe On

Because your code couldn't work and wasn't self-contained, I took a stab at it: Live On Coliru

Now obviously, this may or may not be the same problem you ran into.

However, it does highlight a conceptual problem:

/home/sehe/custom/superboost/libs/spirit/include/boost/spirit/home/qi/detail/assign_to.hpp|153 col 20| error: no matching function for call to ‘AST::SubNode::SubNode(const std::__cxx11::basic_string<char>&)’

There is a problem propagating synthesized attributes into the actual attribute type.

Most interestingly, your production for subnode is "(" >> key >> subnode >> ")". That doesn't match any of the AST nodes. Also, since nothing in that rule is optional AND it recurses, it will by definition never parse because unless the input is infinite, at some point it will fail to match.

Going from the example I'd guess that you actually need something more like:

using Value = std::string;
using Nodes = std::vector<struct Node>;

struct Node {
    Value value;
    Nodes children;
};

Changing the grammar to match makes things work:

Live On Coliru

template <typename Iterator>
struct TimingGrammar                                        //
    : qi::grammar<Iterator, SDFDelayInfo(), qi::space_type> //
{
    TimingGrammar() : TimingGrammar::base_type(start) {
        value_ = +~qi::char_(")(");
        node_  = '(' >> value_ >> *node_ >> ')';
        start  = '(' >> qi::string("TIMING") >> *node_ >> ')';

        BOOST_SPIRIT_DEBUG_NODES((value_)(node_)(start))
    }

  private:
    qi::rule<Iterator, AST::Value()>                value_;
    qi::rule<Iterator, AST::Node(), qi::space_type> node_;
    qi::rule<Iterator, AST::Node(), qi::space_type> start;
};

Printing:

Parsed into: ("TIMING")
Parsed into: ("TIMING" ("RECOVERY ") )
Parsed into: ("TIMING" ("RECOVERY " ("COND") ) )
Parsed into: ("TIMING" ("RECOVERY " ("COND" ("COND" ("COND" ("COND") ) ) ) ) )
Parsed into: ("TIMING" ("WIDTH " ("COND AB == 1'b1 " ("pos CK") ) ) )
Parsed into: ("TIMING" ("WIDTH " ("0.040::0.040") ) )
Parsed into: ("TIMING" ("WIDTH " ("COND AB == 1'b1 " ("pos CK") )  ("0.040::0.040") ) )
Parsed into: ("TIMING" ("RECOVERY " ("COND" ("COND" ("COND" ("COND") ) ) ) )  ("WIDTH " ("COND AB == 1'b1 " ("pos CK") )  ("0.040::0.040") ) )

Debugging Parse Fail

You will note that I prepared BOOST_SPIRIT_DEBUG_NODES, enable it with

#define BOOST_SPIRIT_DEBUG

Subtler Notes

Things easily missed in the above:

  • value_ doesn't have a skipper anymore - otherwise it lose the spaces inside longer keys; see also Boost spirit skipper issues
  • *node_ is a Kleene-star repeat, which means it is optional
  • it's important to wrap "TIMING" into a qi::string parser, not qi::lit because you need the attribute corresponding to Node::value

Full Listing For Reference

As on https://coliru.stacked-crooked.com/a/3e7acf29455d9477:

//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;

namespace AST {
    using Value = std::string;
    using Nodes = std::vector<struct Node>;

    struct Node {
        Value value;
        Nodes children;

        // for debug output
        friend std::ostream& operator<<(std::ostream& os, Node const& n) {
            os << "(" << quoted(n.value);
            if (!n.children.empty()) {
                for (auto& child : n.children)
                    os << " " << child << " ";
            }
            return os << ")";
        }
    };

} // namespace AST

BOOST_FUSION_ADAPT_STRUCT(AST::Node, value, children)

using SDFDelayInfo = AST::Node;

template <typename Iterator>
struct TimingGrammar                                        //
    : qi::grammar<Iterator, SDFDelayInfo(), qi::space_type> //
{
    TimingGrammar() : TimingGrammar::base_type(start) {
        value_ = +~qi::char_(")(");
        node_  = '(' >> value_ >> *node_ >> ')';
        start  = '(' >> qi::string("TIMING") >> *node_ >> ')';

        BOOST_SPIRIT_DEBUG_NODES((value_)(node_)(start))
    }

  private:
    qi::rule<Iterator, AST::Value()>                value_;
    qi::rule<Iterator, AST::Node(), qi::space_type> node_;
    qi::rule<Iterator, AST::Node(), qi::space_type> start;
};

int main() {
    using It = std::string::const_iterator;
    for (std::string const input : {
             "(TIMING  )",
             "(TIMING (RECOVERY ))",
             "(TIMING (RECOVERY (COND)))",
             "(TIMING (RECOVERY (COND(COND(COND(COND))))) )",
             "(TIMING (WIDTH (COND AB == 1'b1 (pos CK)) ))",
             "(TIMING (WIDTH (0.040::0.040)))",
             "(TIMING (WIDTH (COND AB == 1'b1 (pos CK)) (0.040::0.040)))",
             // everything combined
             "(TIMING (RECOVERY (COND(COND(COND(COND))))) (WIDTH (COND AB == 1'b1 (pos CK)) (0.040::0.040)))",
         }) {
        SDFDelayInfo info;
        if (phrase_parse(begin(input), end(input), TimingGrammar<It>{}, qi::space, info)) {
            std::cout << "Parsed into: " << info << "\n";
        } else {
            std::cout << "Parsed failed\n";
        }
    }
}