How do I access individual nodes in the dependency tree and constituency tree returned by the Stanford Parser?

696 views Asked by At
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

using java.io;
using edu.stanford.nlp.process;
using edu.stanford.nlp.ling;
using edu.stanford.nlp.trees;
using edu.stanford.nlp.parser.lexparser;
using Console = System.Console;

namespace Parser
{   

    class Parser
    {
        //loads the lexical parser
        private static LexicalizedParser LoadLexicalizedParser()
        {
            // Path to models extracted from `stanford-parser-3.5.2-models.jar`
            var jarRoot = @"E:\Project\stanford-parser-full-2015-04-20\stanford-parser-3.5.2-models";
            var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";

            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");

            return lp;
        }

        //gets the lexical tree for a 'sentence'
        private static Tree GetLexicalTree(LexicalizedParser lp, string sentence)
        {
            string[] words = sentence.Split(' ');   
            // This sample shows parsing a list of correctly tokenized words            
            var rawWords = Sentence.toCoreLabelList(words);
            var tree = lp.apply(rawWords);

            return tree;
        }

        //gets the constituency tree from the lexical 'tree' as a string
        private static string GetConstituencyTree(Tree tree)
        {
            return tree.pennString();
        }

        //gets the dependency tree from the lexical 'tree' as a string
        private static string GetDependencyTree(Tree tree)
        {
            // Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs = gsf.newGrammaticalStructure(tree);
            var tdl = gs.typedDependenciesCCprocessed();

            string dependencyTree = String.Empty;

            for (int i = 0; i < tdl.size(); ++i)
                dependencyTree += tdl.get(i) + "\n";

            return dependencyTree;
        }


        static void Main()
        {
            var lp = LoadLexicalizedParser();
            string sentence = "This is an easy sentence.";

            Tree tree = GetLexicalTree(lp, sentence);

            string constituencyTree = GetConstituencyTree(tree);
            string dependencyTree = GetDependencyTree(tree);

            Console.WriteLine("Constituency Tree\n" + constituencyTree);
            Console.WriteLine("Dependency Tree\n" + dependencyTree);

            //// Extract collapsed dependencies from parsed tree
            //var tp = new TreePrint("penn,typedDependenciesCollapsed");
            //tp.printTree(tree);
        }
    }
}

In this code, I am getting the constituency tree and dependency tree as strings. But I want to use them using the 'Tree' type itself, ie. I want to access and manipulate the nodes of the variable 'tree'. Is there any way I can do that? Or do I have to create my own tree data structure and get the individual nodes by processing the strings('constituencyTree' & 'dependencyTree')?

[I need this for a small project that I am doing currently.]

1

There are 1 answers

3
Sebastian Schuster On BEST ANSWER

Yes, there exist plenty of data structures to work with constituency trees and dependency trees. For constituency trees, you want to work with the Tree data structure which has many useful built-in functions to traverse trees, get all the terminal nodes, etc.

For dependency trees you can either work with a list of TypedDependency objects where each TypedDependency represents the relation between a governor word and a dependent word, or you can work with a SemanticGraph. To convert the list of TypedDependency which you named tdl in your example to a SemanticGraph, just pass the list to the constructor:

SemanticGraph sg = new SemanticGraph(tdl);