I need to evaluate in C# ad-hoc queries a where value nodes of a tree expressions are sets of numbers.
A sample expression Set-A NOT SET-B AND (SET-C OR SET-D)
AND = INTERSECT
OR = UNION
NOT = EXCEPT
The expressions can get quite complex - my data sets are drawn from the respondents who answer surveys with many questions. I want to end up with the set of respondents who answered specific questions in specific ways.
I've tried building a tree style evaluator but while it works in most cases it fails in others (mostly if I put a NOT in different places).
Is there anyone who has done this before and come up with an elegant solution they would like to share? Preferably in C# - obviously I use LINQ to do the set operations, I need a way to build and evaluate trees that combine multiple sets in different ways.
Here is my current code (added in response to comment)
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Z.Expressions;
using DIPEF = DataImport.EF;
namespace DataImport.Models
{
    public enum OPMODEL { NULL = 0, AND = 1, OR = 2, NOT = 3, ORNOT = 4 }
    public class DemographicOption
    {
        public long OptionId { get; set; }
        public long QuestionId { get; set; }
        public string QuestionText { get; set; }
        public string OptionText { get; set; }
        public OPMODEL Operation { get; set; }
        public bool LParen { get; set; }
        public bool RParen { get; set; }
        public string Logic { get; set; }
 
        public DemographicOption()
        {
            Operation = OPMODEL.NULL;
            QuestionId = 0;
        }
    }
    public class DemographicTree
    {
        public int projectId { get; set; }
        public OPMODEL Operation;  //root node
        public long OptionId;
        public long QuestionId;
        public string Logic;
        public string OptionText;
        public DemographicTree LChild; // Complex Node
        public DemographicTree RChild; // Complex Node
        public Stack<OPMODEL> opStack = new Stack<OPMODEL>();
        public bool Not { get; set; }
        public DemographicTree()
        {
            Operation = OPMODEL.NULL;
            LChild = null;
            RChild = null;
            Not = false;
        }
        private DemographicTree LChildTree(ref List<DemographicOption> list)
        {
            if (list == null || list.Count == 0)
                return null;
            DemographicOption o = list[0];
            DemographicTree T = new DemographicTree();
            T.Operation = o.Operation;
            T.LChild = new DemographicTree();
            T.LChild.QuestionId = o.QuestionId;
            T.LChild.Logic = o.Logic;
            T.LChild.OptionId = o.OptionId;
            T.LChild.OptionText = o.OptionText;
            list.RemoveAt(0);
            if (list.Count > 0)
            {
                opStack.Push(list[0].Operation);
                T.RChild = new DemographicTree(list);
            }
            return T;
        }
        public DemographicTree(List<DemographicOption> list)
        {
            if (list == null || list.Count == 0)
                return;
            DemographicOption o = list[0];
            DemographicOption ultima = list.Last();
            if (ultima.RParen == true && ultima.Operation == OPMODEL.NOT)
                Not = true;
            Operation = o.Operation;
            if (o.RParen == true)
            {
                OptionId = o.OptionId;
                QuestionId = o.QuestionId;
                Logic = o.Logic;
                OptionText = o.OptionText;
                opStack.Push(o.Operation);
                list.RemoveAt(0);
                return;
            }
            if (o.Operation == OPMODEL.NULL && list.Count == 1)
            {
                OptionId = o.OptionId;
                QuestionId = o.QuestionId;
                Logic = o.Logic;
                OptionText = o.OptionText;
                list.RemoveAt(0);
                return;
            }
        //    Operation = o.Operation;
            opStack.Push(Operation);
            if (o.LParen == true)
            {
                LChild = LChildTree(ref list);
                if (list == null || list.Count == 0)
                    return;
            }
            else
            {
                LChild = new DemographicTree();
                LChild.OptionId = o.OptionId;
                LChild.QuestionId = o.QuestionId;
                LChild.Logic = o.Logic;
                LChild.OptionText = o.OptionText;
                list.RemoveAt(0);
            }
            if (list.Count == 1 )
            {
                if (o.RParen == true)
                    LChild.Operation = o.Operation;
                o = list[0];
                RChild = new DemographicTree();
                RChild.OptionId = o.OptionId;
                RChild.QuestionId = o.QuestionId;
                RChild.Logic = o.Logic;
                RChild.OptionText = o.OptionText;
             //   Operation = opStack.Pop();
            }
            if (list.Count > 1)
            {
                Operation = opStack.Pop();
                RChild = new DemographicTree(list);
            }
        }
        public static bool EvaluateLogicalExpression(string logicalExpression)
        {
            System.Data.DataTable table = new System.Data.DataTable();
            table.Columns.Add("", typeof(bool));
            table.Columns[0].Expression = logicalExpression;
            System.Data.DataRow r = table.NewRow();
            table.Rows.Add(r);
            bool result = (Boolean)r[0];
            return result;
        }
        public long findFirstOptionId(DemographicTree dt)
        {
            if (dt.OptionId > 0)
                return dt.OptionId;
            if (dt.RChild != null)
                return findFirstOptionId(dt.RChild);
            if (dt.LChild != null)
                return findFirstOptionId(dt.LChild);
            return 0;
        }
        public  String toString(DemographicTree dt)
        {
            string s = "";
            if (dt == null) return s;
            using (var entities = new DIPEF.DataImportDB())
            {
                if (dt.Operation == OPMODEL.NULL && dt.QuestionId == 0)
                {
                    if (dt.OptionId > 0)
                        s = entities.Options.FirstOrDefault(O => O.OptionId == dt.OptionId).Name;
                    return s;
                }
                else if (dt.Operation == OPMODEL.NULL && dt.QuestionId > 0 && String.IsNullOrEmpty(dt.Logic))
                {
                    s = entities.Questions.FirstOrDefault(Q => Q.QuestionId == dt.QuestionId).Name;
                    return s;
                }
                else if (dt.QuestionId > 0 && !String.IsNullOrEmpty(dt.Logic))
                {
                    var q = entities.Questions.FirstOrDefault(Q => Q.QuestionId == dt.QuestionId);
                    s +=  string.Format(" {0} {1}", q.Name, dt.Logic);
                    if (dt.Operation == OPMODEL.NULL)
                        return s;
                }
                switch (dt.Operation)
                {
                    case OPMODEL.AND:
                        {
                            s += " " + toString(dt.LChild);
                            if (dt.RChild == null)
                                return s;
                            s += " " + toString(dt.RChild);
                            return s;
                        }
                    case OPMODEL.OR:
                        {
                            s += " " + toString(dt.LChild);
                            if (dt.RChild == null)
                                return s;
                            s += " " + toString(dt.RChild);
                            return s;
                        }
                    case OPMODEL.NOT:
                        {
                            s += " !" + toString(dt.LChild);
                            if (dt.RChild == null)
                                return s;
                            s += " " + toString(dt.RChild);
                            return s;
                        }
                    case OPMODEL.ORNOT:
                        {
                            s += " OR !" + toString(dt.LChild);
                            if (dt.RChild == null)
                                return s;
                            s += " " + toString(dt.RChild);
                            return s;
                        }
                }
            }
            return s;
        }
        public List<long> Evaluate(DemographicTree dt, bool NOT)
        {
            var respondents = Evaluate(dt);
            if (NOT == true)
            {
                var optionId = findFirstOptionId(dt);
                using (var entities = new DIPEF.DataImportDB())
                {
                    var option = entities.Options.FirstOrDefault(O => O.OptionId == optionId);
                    if (option != null)
                    {
                        var resp = entities.Responses.Where(R => R.QuestionId == option.QuestionId).Select(r => r.RespondentId).Distinct().ToList();
                        respondents = resp.Except(respondents).ToList();
                    }
                }
            }
            return respondents;
        }
        public List<long> Evaluate(DemographicTree dt)
        {           
            Console.WriteLine("{0}", dt.toString(dt));
            using (var entities = new DIPEF.DataImportDB())
            {
                if (dt.Operation == OPMODEL.NULL && dt.QuestionId == 0)
                {
                    var respondents = entities.Responses.Where(R=>R.OptionId == dt.OptionId).Select(r => r.RespondentId).Distinct().ToList();
                   
                    return respondents;
                }
                else if (dt.Operation == OPMODEL.NULL && dt.QuestionId > 0 && dt.OptionId==0 && String.IsNullOrEmpty(dt.Logic))
                {
                    var respondents = entities.Responses.Where(R => R.QuestionId == dt.QuestionId).Select(r => r.RespondentId).Distinct().ToList();
                    return respondents;
                }
               
                else if ( dt.QuestionId > 0 && !String.IsNullOrEmpty(dt.Logic))
                {
                    var respondents = new List<long>();
                    var responses = entities.Responses.Where(R => R.QuestionId == dt.QuestionId).ToList();
                    if (dt.Logic.Trim().ToUpper() == "N/A" || dt.Logic.ToLower().Trim() == "is null")
                        {
                        int projectId = responses.First().ProjectId;
                        List<long> allRespondents = entities.Respondents.Where(R => R.ProjectId == projectId).Select(A => A.RespondentId).ToList();
                        List<long> qRespondents = responses.Select(B => B.RespondentId).ToList();
                        respondents = allRespondents.Except(qRespondents).ToList();
                    }
                    else foreach(DIPEF.Response r in responses)
                    {
                       
                        var o = entities.Options.FirstOrDefault(O => O.OptionId == r.OptionId);
                        string expression = string.Format("{0} {1}", o.Value, dt.Logic);
                        if (Regex.IsMatch(dt.Logic, @"^[a-zA-Z]+$"))
                        {
                            if (o.Name.ToLower().Contains(dt.Logic.ToLower()))
                            {
                                respondents.Add(r.RespondentId);
                            }
                        }
                        else
                        {
                            if (EvaluateLogicalExpression(expression) == true)
                                if (!respondents.Contains(r.RespondentId))
                                    respondents.Add(r.RespondentId);
                        }
                    }
                    if (dt.Operation == OPMODEL.NULL)
                        return respondents;
                }
                Console.WriteLine(String.Format("{0} {1} {2}", dt.LChild == null ? "null":dt.LChild.toString(dt.LChild), dt.Operation, dt.RChild == null ? "null": dt.RChild.toString(dt.RChild)));
                switch (dt.Operation)
                {
                    case OPMODEL.AND:
                        {
                            var lResp = Evaluate(dt.LChild);
                            if (dt.RChild == null)
                                return lResp;                            
                            lResp = lResp.Intersect(Evaluate(dt.RChild)).ToList();
                            return lResp.Distinct().ToList();
                        }
                    case OPMODEL.OR:
                        {
                            var lResp = Evaluate(dt.LChild);
                            if (dt.RChild == null)
                                return lResp;
                            var rResp = Evaluate(dt.RChild);
                            var resp = lResp.Union(rResp).ToList();
                            return resp;
                        }
                    case OPMODEL.NOT:
                        {
                            var lResp = Evaluate(dt.LChild);
                            if (dt.RChild == null)
                                return lResp;
                            var rResp = Evaluate(dt.RChild);
                            
                            var resp = lResp.Except(rResp).ToList();                     
                            return resp;
                        }
                    case OPMODEL.ORNOT:
                        {
                            var lResp = Evaluate(dt.LChild);
                            if (dt.RChild == null)
                                return lResp;
                            var rResp = Evaluate(dt.RChild);
                            var resp = lResp.Except(rResp).ToList();
                            return resp;
                        }
                }
                return new List<long>();
            }
    }
}
}
 
                        
OK - My original code was a hack and I forced myself to write a much more generic set expression evaluator. I still input an array of 'Expression' objects but very much less specific to my original attempt. Here is the result and how it gets used:
The Test Code:
}