C# Evaluate Tree expressions where leaf nodes are sets of integers

79 views Asked by At

I need to evaluate in C# ad-hoc queries a where value nodes of a tree expressions are sets of numbers.

A sample expression Set-A NOT SET-B AND (SET-C OR SET-D)

AND = INTERSECT
OR = UNION
NOT = EXCEPT

The expressions can get quite complex - my data sets are drawn from the respondents who answer surveys with many questions. I want to end up with the set of respondents who answered specific questions in specific ways.

I've tried building a tree style evaluator but while it works in most cases it fails in others (mostly if I put a NOT in different places).

Is there anyone who has done this before and come up with an elegant solution they would like to share? Preferably in C# - obviously I use LINQ to do the set operations, I need a way to build and evaluate trees that combine multiple sets in different ways.

Here is my current code (added in response to comment)

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Z.Expressions;
using DIPEF = DataImport.EF;

namespace DataImport.Models
{
    public enum OPMODEL { NULL = 0, AND = 1, OR = 2, NOT = 3, ORNOT = 4 }

    public class DemographicOption
    {
        public long OptionId { get; set; }
        public long QuestionId { get; set; }
        public string QuestionText { get; set; }
        public string OptionText { get; set; }
        public OPMODEL Operation { get; set; }
        public bool LParen { get; set; }
        public bool RParen { get; set; }
        public string Logic { get; set; }
 
        public DemographicOption()
        {
            Operation = OPMODEL.NULL;
            QuestionId = 0;
        }
    }

    public class DemographicTree
    {
        public int projectId { get; set; }
        public OPMODEL Operation;  //root node
        public long OptionId;
        public long QuestionId;
        public string Logic;
        public string OptionText;
        public DemographicTree LChild; // Complex Node
        public DemographicTree RChild; // Complex Node
        public Stack<OPMODEL> opStack = new Stack<OPMODEL>();
        public bool Not { get; set; }
        public DemographicTree()
        {
            Operation = OPMODEL.NULL;
            LChild = null;
            RChild = null;
            Not = false;
        }

        private DemographicTree LChildTree(ref List<DemographicOption> list)
        {
            if (list == null || list.Count == 0)
                return null;
            DemographicOption o = list[0];
            DemographicTree T = new DemographicTree();
            T.Operation = o.Operation;
            T.LChild = new DemographicTree();
            T.LChild.QuestionId = o.QuestionId;
            T.LChild.Logic = o.Logic;
            T.LChild.OptionId = o.OptionId;
            T.LChild.OptionText = o.OptionText;
            list.RemoveAt(0);
            if (list.Count > 0)
            {
                opStack.Push(list[0].Operation);
                T.RChild = new DemographicTree(list);
            }
            return T;
        }

        public DemographicTree(List<DemographicOption> list)
        {
            if (list == null || list.Count == 0)
                return;
            DemographicOption o = list[0];
            DemographicOption ultima = list.Last();
            if (ultima.RParen == true && ultima.Operation == OPMODEL.NOT)
                Not = true;
            Operation = o.Operation;
            if (o.RParen == true)
            {
                OptionId = o.OptionId;
                QuestionId = o.QuestionId;
                Logic = o.Logic;
                OptionText = o.OptionText;

                opStack.Push(o.Operation);
                list.RemoveAt(0);
                return;
            }
            if (o.Operation == OPMODEL.NULL && list.Count == 1)
            {
                OptionId = o.OptionId;
                QuestionId = o.QuestionId;
                Logic = o.Logic;
                OptionText = o.OptionText;
                list.RemoveAt(0);
                return;
            }
        //    Operation = o.Operation;
            opStack.Push(Operation);
            if (o.LParen == true)
            {
                LChild = LChildTree(ref list);
                if (list == null || list.Count == 0)
                    return;
            }
            else
            {
                LChild = new DemographicTree();
                LChild.OptionId = o.OptionId;
                LChild.QuestionId = o.QuestionId;
                LChild.Logic = o.Logic;
                LChild.OptionText = o.OptionText;
                list.RemoveAt(0);
            }
            if (list.Count == 1 )
            {
                if (o.RParen == true)
                    LChild.Operation = o.Operation;
                o = list[0];
                RChild = new DemographicTree();
                RChild.OptionId = o.OptionId;
                RChild.QuestionId = o.QuestionId;
                RChild.Logic = o.Logic;
                RChild.OptionText = o.OptionText;
             //   Operation = opStack.Pop();
            }
            if (list.Count > 1)
            {
                Operation = opStack.Pop();
                RChild = new DemographicTree(list);
            }

        }

        public static bool EvaluateLogicalExpression(string logicalExpression)
        {
            System.Data.DataTable table = new System.Data.DataTable();
            table.Columns.Add("", typeof(bool));
            table.Columns[0].Expression = logicalExpression;

            System.Data.DataRow r = table.NewRow();
            table.Rows.Add(r);
            bool result = (Boolean)r[0];
            return result;
        }

        public long findFirstOptionId(DemographicTree dt)
        {
            if (dt.OptionId > 0)
                return dt.OptionId;
            if (dt.RChild != null)
                return findFirstOptionId(dt.RChild);
            if (dt.LChild != null)
                return findFirstOptionId(dt.LChild);
            return 0;
        }

        public  String toString(DemographicTree dt)
        {
            string s = "";
            if (dt == null) return s;
            using (var entities = new DIPEF.DataImportDB())
            {
                if (dt.Operation == OPMODEL.NULL && dt.QuestionId == 0)
                {
                    if (dt.OptionId > 0)
                        s = entities.Options.FirstOrDefault(O => O.OptionId == dt.OptionId).Name;
                    return s;
                }
                else if (dt.Operation == OPMODEL.NULL && dt.QuestionId > 0 && String.IsNullOrEmpty(dt.Logic))
                {
                    s = entities.Questions.FirstOrDefault(Q => Q.QuestionId == dt.QuestionId).Name;
                    return s;
                }
                else if (dt.QuestionId > 0 && !String.IsNullOrEmpty(dt.Logic))
                {
                    var q = entities.Questions.FirstOrDefault(Q => Q.QuestionId == dt.QuestionId);
                    s +=  string.Format(" {0} {1}", q.Name, dt.Logic);
                    if (dt.Operation == OPMODEL.NULL)
                        return s;
                }
                switch (dt.Operation)
                {
                    case OPMODEL.AND:
                        {
                            s += " " + toString(dt.LChild);
                            if (dt.RChild == null)
                                return s;
                            s += " " + toString(dt.RChild);
                            return s;
                        }
                    case OPMODEL.OR:
                        {
                            s += " " + toString(dt.LChild);
                            if (dt.RChild == null)
                                return s;
                            s += " " + toString(dt.RChild);
                            return s;
                        }
                    case OPMODEL.NOT:
                        {
                            s += " !" + toString(dt.LChild);
                            if (dt.RChild == null)
                                return s;
                            s += " " + toString(dt.RChild);
                            return s;
                        }
                    case OPMODEL.ORNOT:
                        {
                            s += " OR !" + toString(dt.LChild);
                            if (dt.RChild == null)
                                return s;
                            s += " " + toString(dt.RChild);
                            return s;
                        }

                }
            }
            return s;
        }
        public List<long> Evaluate(DemographicTree dt, bool NOT)
        {
            var respondents = Evaluate(dt);
            if (NOT == true)
            {
                var optionId = findFirstOptionId(dt);
                using (var entities = new DIPEF.DataImportDB())
                {
                    var option = entities.Options.FirstOrDefault(O => O.OptionId == optionId);
                    if (option != null)
                    {
                        var resp = entities.Responses.Where(R => R.QuestionId == option.QuestionId).Select(r => r.RespondentId).Distinct().ToList();
                        respondents = resp.Except(respondents).ToList();
                    }
                }
            }
            return respondents;
        }
        public List<long> Evaluate(DemographicTree dt)
        {           
            Console.WriteLine("{0}", dt.toString(dt));
            using (var entities = new DIPEF.DataImportDB())
            {
                if (dt.Operation == OPMODEL.NULL && dt.QuestionId == 0)
                {
                    var respondents = entities.Responses.Where(R=>R.OptionId == dt.OptionId).Select(r => r.RespondentId).Distinct().ToList();
                   
                    return respondents;
                }
                else if (dt.Operation == OPMODEL.NULL && dt.QuestionId > 0 && dt.OptionId==0 && String.IsNullOrEmpty(dt.Logic))
                {
                    var respondents = entities.Responses.Where(R => R.QuestionId == dt.QuestionId).Select(r => r.RespondentId).Distinct().ToList();
                    return respondents;
                }
               
                else if ( dt.QuestionId > 0 && !String.IsNullOrEmpty(dt.Logic))
                {
                    var respondents = new List<long>();
                    var responses = entities.Responses.Where(R => R.QuestionId == dt.QuestionId).ToList();
                    if (dt.Logic.Trim().ToUpper() == "N/A" || dt.Logic.ToLower().Trim() == "is null")
                        {
                        int projectId = responses.First().ProjectId;
                        List<long> allRespondents = entities.Respondents.Where(R => R.ProjectId == projectId).Select(A => A.RespondentId).ToList();
                        List<long> qRespondents = responses.Select(B => B.RespondentId).ToList();
                        respondents = allRespondents.Except(qRespondents).ToList();
                    }
                    else foreach(DIPEF.Response r in responses)
                    {
                       
                        var o = entities.Options.FirstOrDefault(O => O.OptionId == r.OptionId);

                        string expression = string.Format("{0} {1}", o.Value, dt.Logic);
                        if (Regex.IsMatch(dt.Logic, @"^[a-zA-Z]+$"))
                        {
                            if (o.Name.ToLower().Contains(dt.Logic.ToLower()))
                            {
                                respondents.Add(r.RespondentId);
                            }
                        }
                        else
                        {
                            if (EvaluateLogicalExpression(expression) == true)
                                if (!respondents.Contains(r.RespondentId))
                                    respondents.Add(r.RespondentId);
                        }

                    }
                    if (dt.Operation == OPMODEL.NULL)
                        return respondents;
                }
                Console.WriteLine(String.Format("{0} {1} {2}", dt.LChild == null ? "null":dt.LChild.toString(dt.LChild), dt.Operation, dt.RChild == null ? "null": dt.RChild.toString(dt.RChild)));
                switch (dt.Operation)
                {
                    case OPMODEL.AND:
                        {
                            var lResp = Evaluate(dt.LChild);
                            if (dt.RChild == null)
                                return lResp;                            
                            lResp = lResp.Intersect(Evaluate(dt.RChild)).ToList();
                            return lResp.Distinct().ToList();
                        }
                    case OPMODEL.OR:
                        {
                            var lResp = Evaluate(dt.LChild);
                            if (dt.RChild == null)
                                return lResp;
                            var rResp = Evaluate(dt.RChild);
                            var resp = lResp.Union(rResp).ToList();
                            return resp;
                        }
                    case OPMODEL.NOT:
                        {
                            var lResp = Evaluate(dt.LChild);
                            if (dt.RChild == null)
                                return lResp;
                            var rResp = Evaluate(dt.RChild);
                            
                            var resp = lResp.Except(rResp).ToList();                     
                            return resp;
                        }
                    case OPMODEL.ORNOT:
                        {
                            var lResp = Evaluate(dt.LChild);
                            if (dt.RChild == null)
                                return lResp;
                            var rResp = Evaluate(dt.RChild);
                            var resp = lResp.Except(rResp).ToList();
                            return resp;
                        }

                }
                return new List<long>();
            }
    }
}
}
1

There are 1 answers

0
user2296345 On BEST ANSWER

OK - My original code was a hack and I forced myself to write a much more generic set expression evaluator. I still input an array of 'Expression' objects but very much less specific to my original attempt. Here is the result and how it gets used:

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;

namespace SetExpressions
{
    public class Expression
    {
        public List<long> Value { get; set; }

        public char Op { get;set; }
        public bool lParen { get; set; }
        public bool rParen { get; set; }
      
        public Expression ( List<long> value, char op, bool lp, bool rp)
        {
            Op = op;
            Value = value;
            lParen = lp;
            rParen = rp;
        }
    }
    class SetExpressionEvaluator
    {

        Stack<List<long>> valueStack = new Stack<List<long>>();
        Stack<char> operatorStack = new Stack<char>();
        private void EvaluatePartial()
        {
            char op = operatorStack.Pop();
            while (op != '(')
            {
                var right = valueStack.Pop();
                var left = valueStack.Pop();
                var newval = Apply(op, left, right);
                valueStack.Push(newval);
                op = operatorStack.Pop();
            }
        }
        public List<long> Apply(char op, List<long> left, List<long> right)
        {
            List<long> res = new List<long>();
            switch (op)
            {
                case '&':
                    res = left.Intersect(right).ToList(); break;
                case '|':
                    res = left.Union(right).ToList(); break;
                case '!':
                    res = left.Except(right).ToList(); break;
            }
            return res;
        }
        public List<long> Evaluate(Expression [] expressions)
        {
            if (expressions.Length == 0)
            {
                return new List<long>();
            }
            
            operatorStack.Clear();
            valueStack.Clear();

            foreach (Expression expression in expressions)
            {
                 valueStack.Push(expression.Value);                                                       
                if (expression.lParen)
                {
                    operatorStack.Push('(');
                }
                if (expression.Op != '\0')
                    operatorStack.Push(expression.Op);
                if (expression.rParen)
                {
                    EvaluatePartial();
                }            
            }

            while (operatorStack.Count > 0)
            {
                var right = valueStack.Pop();
                var left = valueStack.Pop();
                var newval = Apply(operatorStack.Pop(), left, right);
                valueStack.Push(newval);
            }
            return valueStack.Pop();
        }      
    }
}

The Test Code:

using SetExpressions;
using System;
using System.Collections.Generic;

namespace ExpressionEvaluator
{
    class Program
    {
        static void Main(string[] args)
        {
            SetExpressionEvaluator x = new SetExpressionEvaluator();
            var A = new List<long> { 1, 2, 3 };
            var B = new List<long> { 3, 4, 5 };
            var C = new List<long> { 5, 6, 7 };

            // A & B
            List<Expression> list = new List<Expression>();
            list.Add(new Expression(A, '&', false, false));
            list.Add(new Expression(B, '\0', false, false));
            var X = x.Evaluate(list.ToArray());
            Console.WriteLine(String.Join(',', X));
            list = new List<Expression>();
            list.Add(new Expression(A, '&', false, false));
            list.Add(new Expression(B, '|', true, false));
            list.Add(new Expression(C, '\0', false, true));
            X = x.Evaluate(list.ToArray());
            Console.WriteLine(String.Join(',', X));
        }
    }

}