using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using java.io;
using edu.stanford.nlp.process;
using edu.stanford.nlp.ling;
using edu.stanford.nlp.trees;
using edu.stanford.nlp.parser.lexparser;
using Console = System.Console;
namespace Parser
{
class Parser
{
//loads the lexical parser
private static LexicalizedParser LoadLexicalizedParser()
{
// Path to models extracted from `stanford-parser-3.5.2-models.jar`
var jarRoot = @"E:\Project\stanford-parser-full-2015-04-20\stanford-parser-3.5.2-models";
var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";
// Loading english PCFG parser from file
var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");
return lp;
}
//gets the lexical tree for a 'sentence'
private static Tree GetLexicalTree(LexicalizedParser lp, string sentence)
{
string[] words = sentence.Split(' ');
// This sample shows parsing a list of correctly tokenized words
var rawWords = Sentence.toCoreLabelList(words);
var tree = lp.apply(rawWords);
return tree;
}
//gets the constituency tree from the lexical 'tree' as a string
private static string GetConstituencyTree(Tree tree)
{
return tree.pennString();
}
//gets the dependency tree from the lexical 'tree' as a string
private static string GetDependencyTree(Tree tree)
{
// Extract dependencies from lexical tree
var tlp = new PennTreebankLanguagePack();
var gsf = tlp.grammaticalStructureFactory();
var gs = gsf.newGrammaticalStructure(tree);
var tdl = gs.typedDependenciesCCprocessed();
string dependencyTree = String.Empty;
for (int i = 0; i < tdl.size(); ++i)
dependencyTree += tdl.get(i) + "\n";
return dependencyTree;
}
static void Main()
{
var lp = LoadLexicalizedParser();
string sentence = "This is an easy sentence.";
Tree tree = GetLexicalTree(lp, sentence);
string constituencyTree = GetConstituencyTree(tree);
string dependencyTree = GetDependencyTree(tree);
Console.WriteLine("Constituency Tree\n" + constituencyTree);
Console.WriteLine("Dependency Tree\n" + dependencyTree);
//// Extract collapsed dependencies from parsed tree
//var tp = new TreePrint("penn,typedDependenciesCollapsed");
//tp.printTree(tree);
}
}
}
In this code, I am getting the constituency tree and dependency tree as strings. But I want to use them using the 'Tree' type itself, ie. I want to access and manipulate the nodes of the variable 'tree'. Is there any way I can do that? Or do I have to create my own tree data structure and get the individual nodes by processing the strings('constituencyTree' & 'dependencyTree')?
[I need this for a small project that I am doing currently.]
Yes, there exist plenty of data structures to work with constituency trees and dependency trees. For constituency trees, you want to work with the
Treedata structure which has many useful built-in functions to traverse trees, get all the terminal nodes, etc.For dependency trees you can either work with a list of
TypedDependencyobjects where eachTypedDependencyrepresents the relation between a governor word and a dependent word, or you can work with aSemanticGraph. To convert the list ofTypedDependencywhich you namedtdlin your example to aSemanticGraph, just pass the list to the constructor: