using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using java.io;
using edu.stanford.nlp.process;
using edu.stanford.nlp.ling;
using edu.stanford.nlp.trees;
using edu.stanford.nlp.parser.lexparser;
using Console = System.Console;
namespace Parser
{
class Parser
{
//loads the lexical parser
private static LexicalizedParser LoadLexicalizedParser()
{
// Path to models extracted from `stanford-parser-3.5.2-models.jar`
var jarRoot = @"E:\Project\stanford-parser-full-2015-04-20\stanford-parser-3.5.2-models";
var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";
// Loading english PCFG parser from file
var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");
return lp;
}
//gets the lexical tree for a 'sentence'
private static Tree GetLexicalTree(LexicalizedParser lp, string sentence)
{
string[] words = sentence.Split(' ');
// This sample shows parsing a list of correctly tokenized words
var rawWords = Sentence.toCoreLabelList(words);
var tree = lp.apply(rawWords);
return tree;
}
//gets the constituency tree from the lexical 'tree' as a string
private static string GetConstituencyTree(Tree tree)
{
return tree.pennString();
}
//gets the dependency tree from the lexical 'tree' as a string
private static string GetDependencyTree(Tree tree)
{
// Extract dependencies from lexical tree
var tlp = new PennTreebankLanguagePack();
var gsf = tlp.grammaticalStructureFactory();
var gs = gsf.newGrammaticalStructure(tree);
var tdl = gs.typedDependenciesCCprocessed();
string dependencyTree = String.Empty;
for (int i = 0; i < tdl.size(); ++i)
dependencyTree += tdl.get(i) + "\n";
return dependencyTree;
}
static void Main()
{
var lp = LoadLexicalizedParser();
string sentence = "This is an easy sentence.";
Tree tree = GetLexicalTree(lp, sentence);
string constituencyTree = GetConstituencyTree(tree);
string dependencyTree = GetDependencyTree(tree);
Console.WriteLine("Constituency Tree\n" + constituencyTree);
Console.WriteLine("Dependency Tree\n" + dependencyTree);
//// Extract collapsed dependencies from parsed tree
//var tp = new TreePrint("penn,typedDependenciesCollapsed");
//tp.printTree(tree);
}
}
}
In this code, I am getting the constituency tree and dependency tree as strings. But I want to use them using the 'Tree' type itself, ie. I want to access and manipulate the nodes of the variable 'tree'. Is there any way I can do that? Or do I have to create my own tree data structure and get the individual nodes by processing the strings('constituencyTree' & 'dependencyTree')?
[I need this for a small project that I am doing currently.]
Yes, there exist plenty of data structures to work with constituency trees and dependency trees. For constituency trees, you want to work with the
Tree
data structure which has many useful built-in functions to traverse trees, get all the terminal nodes, etc.For dependency trees you can either work with a list of
TypedDependency
objects where eachTypedDependency
represents the relation between a governor word and a dependent word, or you can work with aSemanticGraph
. To convert the list ofTypedDependency
which you namedtdl
in your example to aSemanticGraph
, just pass the list to the constructor: