Получение ядер с помощью стандартного пакета corenlp

Я пытаюсь получить корреференции в тексте. Я новичок в пакете corenlp. Я попробовал приведенный ниже код, который не работает, но я открыт и для других методов.

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */

package corenlp;
import edu.stanford.nlp.ling.CoreAnnotations.CollapsedCCProcessedDependenciesAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.CorefGraphAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TreeAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.semgraph.SemanticGraph;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.IntTuple;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.Timing;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import java.util.Properties;
/**
 *
 * @author Karthi
 */
public class Main {


        // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution
    Properties props = new Properties();
    FileInputStream in = new FileInputStream("Main.properties");

    props.load(in);
    in.close();
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // read some text in the text variable
    String text = "The doctor can consult with other doctors about this patient. If that is the case, the name of the doctor and the names of the consultants have to be maintained. Otherwise, only the name of the doctor is kept. "; // Add your text here!

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);
    System.out.println(document);
    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = (List<CoreMap>) document.get(SentencesAnnotation.class);
    System.out.println(sentences);
    for(CoreMap sentence: sentences) {
      // traversing the words in the current sentence
      // a CoreLabel is a CoreMap with additional token-specific methods
      for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
        // this is the text of the token
        String word = token.get(TextAnnotation.class);
        // this is the POS tag of the token
        String pos = token.get(PartOfSpeechAnnotation.class);
        // this is the NER label of the token
        String ne = token.get(NamedEntityTagAnnotation.class);
      }

      // this is the parse tree of the current sentence
      Tree tree = sentence.get(TreeAnnotation.class);
System.out.println(tree);
      // this is the Stanford dependency graph of the current sentence
      SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
      System.out.println(dependencies);
    }

    // this is the coreference link graph
    // each link stores an arc in the graph; the first element in the Pair is the source, the second is the target
    // each node is stored as <sentence id, token id>. Both offsets start at 1!
    List<Pair<IntTuple, IntTuple>> graph = document.get(CorefGraphAnnotation.class);
    System.out.println(graph);

    }

}

Это ошибка, которую я получаю:

Loading POS Model [// For POS model] ... Loading default properties from trained tagger // For POS model
Error: No such trained tagger config file found.
java.io.FileNotFoundException: \\ For POS model (The specified path is invalid)
        at java.io.FileInputStream.open(Native Method)
        at java.io.FileInputStream.<init>(FileInputStream.java:106)
        at java.io.FileInputStream.<init>(FileInputStream.java:66)
        at edu.stanford.nlp.tagger.maxent.TaggerConfig.getTaggerDataInputStream(TaggerConfig.java:741)
        at edu.stanford.nlp.tagger.maxent.TaggerConfig.<init>(TaggerConfig.java:178)
        at edu.stanford.nlp.tagger.maxent.MaxentTagger.<init>(MaxentTagger.java:228)
        at edu.stanford.nlp.pipeline.POSTaggerAnnotator.loadModel(POSTaggerAnnotator.java:57)
        at edu.stanford.nlp.pipeline.POSTaggerAnnotator.<init>(POSTaggerAnnotator.java:44)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP$4.create(StanfordCoreNLP.java:441)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP$4.create(StanfordCoreNLP.java:434)
        at edu.stanford.nlp.pipeline.AnnotatorPool.get(AnnotatorPool.java:62)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.construct(StanfordCoreNLP.java:309)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.<init>(StanfordCoreNLP.java:347)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.<init>(StanfordCoreNLP.java:337)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.<init>(StanfordCoreNLP.java:329)
        at corenlp.Main.main(Main.java:66)
Exception in thread "main" java.lang.RuntimeException: java.io.FileNotFoundException: \\ For POS model (The specified path is invalid)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP$4.create(StanfordCoreNLP.java:443)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP$4.create(StanfordCoreNLP.java:434)
        at edu.stanford.nlp.pipeline.AnnotatorPool.get(AnnotatorPool.java:62)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.construct(StanfordCoreNLP.java:309)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.<init>(StanfordCoreNLP.java:347)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.<init>(StanfordCoreNLP.java:337)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.<init>(StanfordCoreNLP.java:329)
        at corenlp.Main.main(Main.java:66)
Caused by: java.io.FileNotFoundException: \\ For POS model (The specified path is invalid)
        at java.io.FileInputStream.open(Native Method)
        at java.io.FileInputStream.<init>(FileInputStream.java:106)
        at java.io.FileInputStream.<init>(FileInputStream.java:66)
        at edu.stanford.nlp.tagger.maxent.TaggerConfig.getTaggerDataInputStream(TaggerConfig.java:741)
        at edu.stanford.nlp.tagger.maxent.MaxentTagger.readModelAndInit(MaxentTagger.java:643)
        at edu.stanford.nlp.tagger.maxent.MaxentTagger.<init>(MaxentTagger.java:268)
        at edu.stanford.nlp.tagger.maxent.MaxentTagger.<init>(MaxentTagger.java:228)
        at edu.stanford.nlp.pipeline.POSTaggerAnnotator.loadModel(POSTaggerAnnotator.java:57)
        at edu.stanford.nlp.pipeline.POSTaggerAnnotator.<init>(POSTaggerAnnotator.java:44)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP$4.create(StanfordCoreNLP.java:441)
        ... 7 more
Java Result: 1

java nlp stanford-nlp

karthi 19.02.2011 источник

comment

проверьте, находится ли файл Main.properties в доступном пути. - ray 05.05.2012

Ответы (3)

arrow_upward
2
arrow_downward

Эта ошибка просто означает, что программа не находит модели данных, необходимые для запуска. Они должны быть в вашем пути к классам. Если вы находитесь в каталоге дистрибутива, вы можете сделать это с помощью такой команды:

java -cp stanford-corenlp-2010-11-12.jar:stanford-corenlp-models-2010-11-06.jar:xom.jar:jgrapht.jar -Xmx3g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma,ner,parse,dcoref -file input.txt

Вторая банка содержит модели. Если вы используете Windows, замените двоеточие выше точкой с запятой.

Christopher Manning 25.04.2011

comment

Что, если я использую его в веб-приложении, обрабатывающем некоторые строки? Я включил Jar в свою сборку, но все равно получаю именно эту ошибку! - Priyanshu Jha; 22.03.2013

comment

Тебе нужно. Эти модели трясутся в вашем CLASSPATH во время выполнения. - Christopher Manning; 23.03.2013

comment

@ChristopherManning Какая польза от xom.jar ?? - Ramana Venkata; 05.06.2013

comment

xom предназначен для вывода XML. Это библиотека Java DOM. Если вам не нужен вывод XML, он вам не нужен. - Christopher Manning; 06.06.2013

arrow_upward
1
arrow_downward

Я сделал следующее:

используйте импорт Tex, предоставленный, как указано выше
проверьте, находится ли файл Main.properties в доступном пути. как сказал Карти.
заменить <List<Pair<IntTuple, IntTuple>> graph = document.get(CorefGraphAnnotation.class); на Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
Я получил результаты следующим образом. Это достаточно?

{1=CHAIN1-[«Врач» в предложении 1, «врач» в предложении 2, «врач» в предложении 3], 2=CHAIN2-[«другие врачи об этом пациенте» в предложении 1], 4= C HAIN4-["этот пациент" в предложении 1], 5=CHAIN5-["этот" в предложении 2, "случай" в предложении 2], 7=CHAIN7-["имя врача и имена консультанты" в предложении 2, "только имя врача" в предложении 3], 9=CHAIN9-["врач и имена консультантов" в предложении 2], 11=CHAIN11-["имена консультантов " в предложении 2], 13=CHAIN13-["консультанты" в предложении 2 ]}

aicong32 18.08.2013

arrow_upward
0
arrow_downward

Кажется, что структура файлов немного изменилась по сравнению с версией как в этом разделе, так и во фрагменте здесь: http://nlp.stanford.edu/software/corenlp.shtml

Замените импорт на те:

import edu.stanford.nlp.trees.semgraph.SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation;
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation;
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefGraphAnnotation;
import edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation;
import edu.stanford.nlp.trees.semgraph.SemanticGraph;
import edu.stanford.nlp.dcoref.CorefChain

Это помогло мне ;-)

Tex 21.05.2012

Получение ядер с помощью стандартного пакета corenlp

Ответы (3)

Похожие вопросы