SQL WHERE Clause Parser by boost qi

109 views Asked by At

I want to implement some rules of SQL WHERE clause parser by boost qi. If ound the its BNF on https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#_8_4_in_predicate. I implemented a specific predicate rules by some special operands. But when I want to combine it with OR/AND/NOT logical operator, it does not work. The following source codes testcases and grammar.

#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <variant>
#include <functional>
#include <tuple>

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_grammar.hpp>
#include <boost/spirit/repository/include/qi_distinct.hpp>

const std::vector<std::tuple<bool /* parsing result */, std::string /* expr */>> testcases_{
  {true, "(a between (12,13))",},
  {true, "(a between [12,13))",},
  {true, "(a between [12,13])",},
  {true, "(a between (12,13])",},
  {true, "(a not between (12,13))",},
  {true, "(a not between [12,13))",},
  {true, "(a not between [12,13])",},
  {true, "(a not between (12,13])",},
  {true, "(a between ('12',13))",},
  {true, "(a between ['12',13))",},
  {true, "(a between ['12',13])",},
  {true, "(a between ('12',13])",},
  {true, "(a not between ('12',13))",},
  {true, "(a not between ['12',13))",},
  {true, "(a not between ['12',13])",},
  {true, "(a not between ('12',13])",},
  {true, "(a between ('12','13'))",},
  {true, "(a between ['12','13'))",},
  {true, "(a between ['12','13'])",},
  {true, "(a between ('12','13'])",},
  {true, "(a not between ('12','13'))",},
  {true, "(a not between ['12','13'))",},
  {true, "(a not between ['12','13'])",},
  {true, "(a not between ('12','13'])",},

  {true, "(a in (12,13,3433))",},
  {true, "(a in (12,13,3433))",},
  {true, "a in (12,13,3433)",},
  {true, "a in ('12','13','3433')",},
  {true, "(a in ('12','13','3433'))",},
  {true, "(a NOT in (12,13,3433))",},
  {true, "a not in (12,13,3433)",},
  {true, "a NOT in ('12','13','3433')",},
  {true, "(a not in ('12','13','3433'))",},

  {true, "a gt 'b'",},
  {true, "a > 'b'",},
  {true, "(a gt 'b')",},
  {true, "(a > 'b')",},

  {true, "a >= 'b'",},
  {true, "a => 'b'",},
  {true, "a gte 'b'",},
  {true, "(a gte 'b')",},
  {true, "(a >= 'b')",},
  {true, "(a => 'b')",},

  {true, "a lt 'b'",},
  {true, "a < 'b'",},

  {true, "a <= 'b'",},
  {true, "a =< 'b'",},
  {true, "a lte 'b'",},
  {true, "(a lte 'b')",},
  {true, "(a <= 'b')",},
  {true, "(a =< 'b')",},

  {true, "a eq 122",},
  {true, "a == 150",},
  {true, "a = 0",},
  {true, "a eq '122'",},
  {true, "a == '150'",},
  {true, "a = '0'",},

  {true, "a neq 122",},
  {true, "a <> 150",},
  {true, "a != 0",},
  {true, "a neq '122'",},
  {true, "a <> '150'",},
  {true, "a != '0'",},

  {true, "a like 'b'",},
  {true, "a not like 'b'",},
  {true, "(a like 'b')",},
  {true, "(a not like 'b')",},
  {true, "a like '*b'",},
  {true, "a not like 'b#32'",},
  {true, "(a like 'bdfwdf')",},
  {true, "(a not like 'b888..///')",},
}
;

namespace sql {
namespace qi    = boost::spirit::qi;
using namespace qi;
using boost::spirit::repository::distinct;
template <typename Iterator, typename Signature= unused_type(), typename Skipper= unused_type, typename Locals= unused_type >
struct parser_t: grammar<Iterator, Signature, Skipper, Locals>{

  parser_t():
    parser_t::base_type(start_)
  {
    //---------------------------------------------------------------------------
    identifier_= qi::no_case[ qi::lexeme[ (qi::alpha >> *(qi::alnum | '_')) ] ] ;
    str_val_= qi::lexeme[ qi::char_('\'') >>  +( qi::char_ - qi::char_('\'') ) >> qi::char_('\'')];
    num_val_ = +digit;
    value_= ( num_val_ | str_val_ );
    in_value_= lit("(") >> ((num_val_ % ',') | (str_val_ % ',')) >> lit(")");

    //---------------------------------------------------------------------------
    AND= (no_case[lit("AND")] |
        lit("&&") |
        distinct(lit("&"))[lit("&")]);

    OR= (no_case[lit("OR")] |
        lit("||") |
        distinct(lit("|"))[lit("|")]);


    NOT= (no_case[lit("NOT")] |
        distinct(lit("="))[lit("!")]);

    BETWEEN= no_case[lit("BETWEEN")];
    IN= no_case[lit("IN")];
    LIKE= no_case[lit("LIKE")];
    IS= (no_case[lit("IS")]);

    TRUE= no_case[lit("TRUE")];
    FALSE= no_case[lit("FALSE")];
    truth_value_= ( TRUE | FALSE);

    //---------------------------------------------------------------------------
    left_paren_= lit("(");
    right_paren_= lit(")");
    left_bracket_= lit("[");
    right_bracket_= lit("]");
    comma_= lit(",");

    // eq, =, ==
    equals_operator_=
        no_case[lit("eq")] |
        distinct(lit(">") | lit("<")| lit("="))[lit("=")] |
        lit("==");

    // neq, !=, <>
    not_equals_operator_=
        no_case[lit("neq")] |
        lit("!=") |
        lit("<>");

    // lt, <
    less_than_operator_=
        distinct(no_case[lit("e")])[no_case[lit("lt")]] |
        distinct('=')[lit("<")];

    // gt, >
    greater_than_operator_=
        distinct(no_case[lit("e")])[no_case[lit("gt")]] |
        distinct('=')[lit(">")];

    // lte, <=, =<
    less_than_or_equals_operator_=
        no_case[lit("lte")] |
        lit("<=") |
        lit("=<");

    //gte, >=, =>
    greater_than_or_equals_operator_=
        no_case[lit("gte")] |
        lit(">=") |
        lit("=>");

    //---------------------------------------------------------------------------
    between_predicate_= identifier_  >> -( NOT ) >> BETWEEN >> ( left_paren_ | left_bracket_ ) >> value_ >> comma_ >> value_ >> ( right_paren_ | right_bracket_) ;
    in_predicate_= identifier_  >> -( NOT ) >> IN >> in_value_;
    like_predicate_= identifier_ >> -( NOT ) >> LIKE >> str_val_;
    comp_op_= equals_operator_ | not_equals_operator_ | less_than_operator_ | greater_than_operator_ | less_than_or_equals_operator_ | greater_than_or_equals_operator_;
    comparison_predicate_= identifier_ >> comp_op_  >> value_;
    predicate_= comparison_predicate_ | between_predicate_ | in_predicate_ | like_predicate_ ;

    //---------------------------------------------------------------------------
    start_  = or_.alias();

    or_  = and_ >> *( OR  >> and_);
    and_ = not_ >> *( AND >> not_);
    not_ = (NOT >> simple_) | simple_;
    simple_ = (left_paren_ >> start_ >> right_paren_) | predicate_;

/*
    boolean_predicand_=left_paren_  >> boolean_value_expression_  >> right_paren_;
    boolean_primary_= predicate_ | boolean_predicand_;
    boolean_test_= boolean_primary_  >> -( IS >> -( NOT ) >> truth_value_ );
    boolean_factor_= -( NOT ) >> boolean_test_;
    boolean_term_= boolean_factor_ | (boolean_term_  >> AND >> boolean_factor_);
    boolean_value_expression_= boolean_term_ | ( boolean_value_expression_ >> OR >> boolean_term_);

    start_= boolean_value_expression_;
*/
    //start_= predicate_ | (start_ >> OR >> predicate_);

    //---------------------------------------------------------------------------
  }

  rule<Iterator, Signature, Skipper, Locals> start_;

  rule<Iterator, Signature, Skipper, Locals>
  boolean_value_expression_,
  boolean_term_,
  boolean_factor_,
  boolean_test_,
  boolean_primary_,
  boolean_predicand_
  ;

  rule<Iterator, Signature, Skipper, Locals>
  or_,
  and_,
  not_,
  simple_
  ;

  rule<Iterator, Signature, Skipper, Locals>
  predicate_,
  comparison_predicate_,
  comp_op_,
  between_predicate_,
  in_predicate_,
  like_predicate_
  ;

  rule<Iterator, Signature, Skipper, Locals>
  equals_operator_,
  not_equals_operator_,
  less_than_operator_,
  greater_than_operator_,
  less_than_or_equals_operator_,
  greater_than_or_equals_operator_
  ;

  rule<Iterator, Signature, Skipper, Locals>
  AND, OR, IS, NOT,
  BETWEEN, IN, LIKE,
  TRUE, FALSE, truth_value_
  ;

  rule<Iterator, Signature, Skipper, Locals>
  left_paren_,
  right_paren_,
  left_bracket_,
  right_bracket_,
  comma_
  ;

  rule<Iterator, Signature, Skipper, Locals>
  identifier_,
  str_val_,
  num_val_,
  value_,
  in_value_
  ;

};
}

int main()
{
  namespace qi    = boost::spirit::qi;
  using namespace qi;

  // Single predicates
  for(auto testcase: testcases_)
  {
    auto expected_result{std::get<0>(testcase)};
    auto expr{std::get<1>(testcase)};

    auto error(false);
    std::cout << "---------------------------------------------------------------------" << std::endl <<
                 "Expr: " << expr << std::endl;

    auto f(std::begin(expr)), l(std::end(expr));

    sql::parser_t<decltype(f),
        unused_type(),
        qi::space_type,
        unused_type> parser;

    try
    {
      bool ok = qi::phrase_parse(f, l, parser, boost::spirit::qi::space);

      if (!ok)
      {
        error= true;
        std::cerr << "invalid expr\n";
      }
      else
        std::cout << "result: " /*<< result */<< "\n";

    } catch (const qi::expectation_failure<decltype(f)>& e)
    {
      error= true;
      std::cerr << "expectation_failure at '" << std::string(e.first, e.last) << "'\n";
    } catch(...)
    {
      error= true;
      std::cerr << "Excepion!" << std::endl;
    }

    if (f!=l)
    {
      error= true;
      std::cerr << "unparsed: '" << std::string(f,l) << "'\n";
    }

    if(expected_result != !error && std::getchar() == 'q') break;
  }

  // OR/AND predicates
  auto terminate(false);
  for(auto testcase1: testcases_)
  {
    auto expected_result1{std::get<0>(testcase1)};
    auto expr1{std::get<1>(testcase1)};
    for(auto testcase2: testcases_)
    {
      auto expected_result2{std::get<0>(testcase2)};
      auto expr2{std::get<1>(testcase2)};

      auto expr(expr1 + "OR" + expr2);
      auto expected_result(expected_result1 && expected_result2);

      auto error(false);
      std::cout << "---------------------------------------------------------------------" << std::endl <<
                   "Expr: " << expr << std::endl;

      auto f(std::begin(expr)), l(std::end(expr));

      sql::parser_t<decltype(f),
          unused_type(),
          qi::space_type,
          unused_type> parser;

      try
      {
        bool ok = qi::phrase_parse(f, l, parser, boost::spirit::qi::space);

        if (!ok)
        {
          error= true;
          std::cerr << "invalid expr\n";
        }
        else
          std::cout << "result: " /*<< result */<< "\n";

      } catch (const qi::expectation_failure<decltype(f)>& e)
      {
        error= true;
        std::cerr << "expectation_failure at '" << std::string(e.first, e.last) << "'\n";
      } catch(...)
      {
        error= true;
        std::cerr << "Excepion!" << std::endl;
      }

      if (f!=l)
      {
        error= true;
        std::cerr << "unparsed: '" << std::string(f,l) << "'\n";
      }

      if(expected_result != !error && std::getchar() == 'q')
      {
        terminate=true;
        break;
      }
    }
    if(terminate)break;
  }
  return 0;
}
0

There are 0 answers