I need to evaluate in C# ad-hoc queries a where value nodes of a tree expressions are sets of numbers.
A sample expression Set-A NOT SET-B AND (SET-C OR SET-D)
AND = INTERSECT
OR = UNION
NOT = EXCEPT
The expressions can get quite complex - my data sets are drawn from the respondents who answer surveys with many questions. I want to end up with the set of respondents who answered specific questions in specific ways.
I've tried building a tree style evaluator but while it works in most cases it fails in others (mostly if I put a NOT in different places).
Is there anyone who has done this before and come up with an elegant solution they would like to share? Preferably in C# - obviously I use LINQ to do the set operations, I need a way to build and evaluate trees that combine multiple sets in different ways.
Here is my current code (added in response to comment)
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Z.Expressions;
using DIPEF = DataImport.EF;
namespace DataImport.Models
{
public enum OPMODEL { NULL = 0, AND = 1, OR = 2, NOT = 3, ORNOT = 4 }
public class DemographicOption
{
public long OptionId { get; set; }
public long QuestionId { get; set; }
public string QuestionText { get; set; }
public string OptionText { get; set; }
public OPMODEL Operation { get; set; }
public bool LParen { get; set; }
public bool RParen { get; set; }
public string Logic { get; set; }
public DemographicOption()
{
Operation = OPMODEL.NULL;
QuestionId = 0;
}
}
public class DemographicTree
{
public int projectId { get; set; }
public OPMODEL Operation; //root node
public long OptionId;
public long QuestionId;
public string Logic;
public string OptionText;
public DemographicTree LChild; // Complex Node
public DemographicTree RChild; // Complex Node
public Stack<OPMODEL> opStack = new Stack<OPMODEL>();
public bool Not { get; set; }
public DemographicTree()
{
Operation = OPMODEL.NULL;
LChild = null;
RChild = null;
Not = false;
}
private DemographicTree LChildTree(ref List<DemographicOption> list)
{
if (list == null || list.Count == 0)
return null;
DemographicOption o = list[0];
DemographicTree T = new DemographicTree();
T.Operation = o.Operation;
T.LChild = new DemographicTree();
T.LChild.QuestionId = o.QuestionId;
T.LChild.Logic = o.Logic;
T.LChild.OptionId = o.OptionId;
T.LChild.OptionText = o.OptionText;
list.RemoveAt(0);
if (list.Count > 0)
{
opStack.Push(list[0].Operation);
T.RChild = new DemographicTree(list);
}
return T;
}
public DemographicTree(List<DemographicOption> list)
{
if (list == null || list.Count == 0)
return;
DemographicOption o = list[0];
DemographicOption ultima = list.Last();
if (ultima.RParen == true && ultima.Operation == OPMODEL.NOT)
Not = true;
Operation = o.Operation;
if (o.RParen == true)
{
OptionId = o.OptionId;
QuestionId = o.QuestionId;
Logic = o.Logic;
OptionText = o.OptionText;
opStack.Push(o.Operation);
list.RemoveAt(0);
return;
}
if (o.Operation == OPMODEL.NULL && list.Count == 1)
{
OptionId = o.OptionId;
QuestionId = o.QuestionId;
Logic = o.Logic;
OptionText = o.OptionText;
list.RemoveAt(0);
return;
}
// Operation = o.Operation;
opStack.Push(Operation);
if (o.LParen == true)
{
LChild = LChildTree(ref list);
if (list == null || list.Count == 0)
return;
}
else
{
LChild = new DemographicTree();
LChild.OptionId = o.OptionId;
LChild.QuestionId = o.QuestionId;
LChild.Logic = o.Logic;
LChild.OptionText = o.OptionText;
list.RemoveAt(0);
}
if (list.Count == 1 )
{
if (o.RParen == true)
LChild.Operation = o.Operation;
o = list[0];
RChild = new DemographicTree();
RChild.OptionId = o.OptionId;
RChild.QuestionId = o.QuestionId;
RChild.Logic = o.Logic;
RChild.OptionText = o.OptionText;
// Operation = opStack.Pop();
}
if (list.Count > 1)
{
Operation = opStack.Pop();
RChild = new DemographicTree(list);
}
}
public static bool EvaluateLogicalExpression(string logicalExpression)
{
System.Data.DataTable table = new System.Data.DataTable();
table.Columns.Add("", typeof(bool));
table.Columns[0].Expression = logicalExpression;
System.Data.DataRow r = table.NewRow();
table.Rows.Add(r);
bool result = (Boolean)r[0];
return result;
}
public long findFirstOptionId(DemographicTree dt)
{
if (dt.OptionId > 0)
return dt.OptionId;
if (dt.RChild != null)
return findFirstOptionId(dt.RChild);
if (dt.LChild != null)
return findFirstOptionId(dt.LChild);
return 0;
}
public String toString(DemographicTree dt)
{
string s = "";
if (dt == null) return s;
using (var entities = new DIPEF.DataImportDB())
{
if (dt.Operation == OPMODEL.NULL && dt.QuestionId == 0)
{
if (dt.OptionId > 0)
s = entities.Options.FirstOrDefault(O => O.OptionId == dt.OptionId).Name;
return s;
}
else if (dt.Operation == OPMODEL.NULL && dt.QuestionId > 0 && String.IsNullOrEmpty(dt.Logic))
{
s = entities.Questions.FirstOrDefault(Q => Q.QuestionId == dt.QuestionId).Name;
return s;
}
else if (dt.QuestionId > 0 && !String.IsNullOrEmpty(dt.Logic))
{
var q = entities.Questions.FirstOrDefault(Q => Q.QuestionId == dt.QuestionId);
s += string.Format(" {0} {1}", q.Name, dt.Logic);
if (dt.Operation == OPMODEL.NULL)
return s;
}
switch (dt.Operation)
{
case OPMODEL.AND:
{
s += " " + toString(dt.LChild);
if (dt.RChild == null)
return s;
s += " " + toString(dt.RChild);
return s;
}
case OPMODEL.OR:
{
s += " " + toString(dt.LChild);
if (dt.RChild == null)
return s;
s += " " + toString(dt.RChild);
return s;
}
case OPMODEL.NOT:
{
s += " !" + toString(dt.LChild);
if (dt.RChild == null)
return s;
s += " " + toString(dt.RChild);
return s;
}
case OPMODEL.ORNOT:
{
s += " OR !" + toString(dt.LChild);
if (dt.RChild == null)
return s;
s += " " + toString(dt.RChild);
return s;
}
}
}
return s;
}
public List<long> Evaluate(DemographicTree dt, bool NOT)
{
var respondents = Evaluate(dt);
if (NOT == true)
{
var optionId = findFirstOptionId(dt);
using (var entities = new DIPEF.DataImportDB())
{
var option = entities.Options.FirstOrDefault(O => O.OptionId == optionId);
if (option != null)
{
var resp = entities.Responses.Where(R => R.QuestionId == option.QuestionId).Select(r => r.RespondentId).Distinct().ToList();
respondents = resp.Except(respondents).ToList();
}
}
}
return respondents;
}
public List<long> Evaluate(DemographicTree dt)
{
Console.WriteLine("{0}", dt.toString(dt));
using (var entities = new DIPEF.DataImportDB())
{
if (dt.Operation == OPMODEL.NULL && dt.QuestionId == 0)
{
var respondents = entities.Responses.Where(R=>R.OptionId == dt.OptionId).Select(r => r.RespondentId).Distinct().ToList();
return respondents;
}
else if (dt.Operation == OPMODEL.NULL && dt.QuestionId > 0 && dt.OptionId==0 && String.IsNullOrEmpty(dt.Logic))
{
var respondents = entities.Responses.Where(R => R.QuestionId == dt.QuestionId).Select(r => r.RespondentId).Distinct().ToList();
return respondents;
}
else if ( dt.QuestionId > 0 && !String.IsNullOrEmpty(dt.Logic))
{
var respondents = new List<long>();
var responses = entities.Responses.Where(R => R.QuestionId == dt.QuestionId).ToList();
if (dt.Logic.Trim().ToUpper() == "N/A" || dt.Logic.ToLower().Trim() == "is null")
{
int projectId = responses.First().ProjectId;
List<long> allRespondents = entities.Respondents.Where(R => R.ProjectId == projectId).Select(A => A.RespondentId).ToList();
List<long> qRespondents = responses.Select(B => B.RespondentId).ToList();
respondents = allRespondents.Except(qRespondents).ToList();
}
else foreach(DIPEF.Response r in responses)
{
var o = entities.Options.FirstOrDefault(O => O.OptionId == r.OptionId);
string expression = string.Format("{0} {1}", o.Value, dt.Logic);
if (Regex.IsMatch(dt.Logic, @"^[a-zA-Z]+$"))
{
if (o.Name.ToLower().Contains(dt.Logic.ToLower()))
{
respondents.Add(r.RespondentId);
}
}
else
{
if (EvaluateLogicalExpression(expression) == true)
if (!respondents.Contains(r.RespondentId))
respondents.Add(r.RespondentId);
}
}
if (dt.Operation == OPMODEL.NULL)
return respondents;
}
Console.WriteLine(String.Format("{0} {1} {2}", dt.LChild == null ? "null":dt.LChild.toString(dt.LChild), dt.Operation, dt.RChild == null ? "null": dt.RChild.toString(dt.RChild)));
switch (dt.Operation)
{
case OPMODEL.AND:
{
var lResp = Evaluate(dt.LChild);
if (dt.RChild == null)
return lResp;
lResp = lResp.Intersect(Evaluate(dt.RChild)).ToList();
return lResp.Distinct().ToList();
}
case OPMODEL.OR:
{
var lResp = Evaluate(dt.LChild);
if (dt.RChild == null)
return lResp;
var rResp = Evaluate(dt.RChild);
var resp = lResp.Union(rResp).ToList();
return resp;
}
case OPMODEL.NOT:
{
var lResp = Evaluate(dt.LChild);
if (dt.RChild == null)
return lResp;
var rResp = Evaluate(dt.RChild);
var resp = lResp.Except(rResp).ToList();
return resp;
}
case OPMODEL.ORNOT:
{
var lResp = Evaluate(dt.LChild);
if (dt.RChild == null)
return lResp;
var rResp = Evaluate(dt.RChild);
var resp = lResp.Except(rResp).ToList();
return resp;
}
}
return new List<long>();
}
}
}
}
OK - My original code was a hack and I forced myself to write a much more generic set expression evaluator. I still input an array of 'Expression' objects but very much less specific to my original attempt. Here is the result and how it gets used:
The Test Code:
}