Java源码示例:org.aksw.gerbil.transfer.nif.data.NamedEntity
示例1
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (qName.equalsIgnoreCase(SENTENCE_ELEMENT)) {
i++;
documents.add(new DocumentImpl(sentence.toString(),
"http://senseval" + i, markings));
sentence = new StringBuilder();
} else if (qName.equalsIgnoreCase(INSTANCE_ELEMENT)) {
markings.add(new NamedEntity(start, length, instanceUri));
start = sentence.length();
} else if (qName.equalsIgnoreCase(WF_ELEMENT)) {
start = sentence.length();
}
this.field = 0;
}
示例2
@Test
public void test() throws GerbilException, IOException {
SensevalDataset data = new SensevalDataset(this.file);
data.init();
List<Document> documents = data.getInstances();
Document doc = documents.get(docIndex);
assertEquals(expectedSentence, doc.getText());
List<Marking> markings = doc.getMarkings();
String[] marks = new String[markings.size()];
for(int i=0; i<markings.size();i++){
NamedEntity entity = ((NamedEntity)markings.get(i));
marks[i]=doc.getText().substring(entity.getStartPosition(),
entity.getStartPosition()+entity.getLength());
}
assertArrayEquals(expectedMarkings,
marks);
data.close();
}
示例3
@Test
public void test() {
ConfidenceScoreEvaluatorDecorator<NamedEntity> decorator = new ConfidenceScoreEvaluatorDecorator<NamedEntity>(
this, EVALUTION_RESULT_NAME, this);
EvaluationResultContainer results = new EvaluationResultContainer();
decorator.evaluate(annotatorResults, new ArrayList<List<NamedEntity>>(), results);
boolean evalationResultFound = false;
boolean scoreThresholdFound = false;
for (EvaluationResult result : results.getResults()) {
if (EVALUTION_RESULT_NAME.equals(result.getName())) {
evalationResultFound = true;
Assert.assertEquals(expectedScore, result.getValue());
}
if (ConfidenceScoreEvaluatorDecorator.CONFIDENCE_SCORE_THRESHOLD_RESULT_NAME.equals(result.getName())) {
scoreThresholdFound = true;
Assert.assertEquals(expectedThreshold, result.getValue());
}
}
Assert.assertTrue(evalationResultFound);
if (expectedThreshold != null) {
Assert.assertTrue(scoreThresholdFound);
}
}
示例4
@Override
public void evaluate(List<List<NamedEntity>> annotatorResults, List<List<NamedEntity>> goldStandard,
EvaluationResultContainer results) {
// all gold standards in this test are empty
Assert.assertEquals(0, goldStandard.size());
// simply count all correct named entities
int score = 0, sum = 0;
for (List<NamedEntity> nes : annotatorResults) {
for (NamedEntity ne : nes) {
if (ne.containsUri(CORRECT_MARKING)) {
++score;
}
++sum;
}
}
if (sum == 0) {
results.addResult(new DoubleEvaluationResult(EVALUTION_RESULT_NAME, 0));
} else {
results.addResult(new DoubleEvaluationResult(EVALUTION_RESULT_NAME, (double) score / (double) sum));
}
}
示例5
public AnnotatorConfiguration loadAnnotatorFile(String annotatorFileName, boolean eraseConfidenceValues)
throws GerbilException {
Dataset dataset = (new NIFFileDatasetConfig("ANNOTATOR", annotatorFileName, false, EXPERIMENT_TYPE, null, null))
.getDataset(EXPERIMENT_TYPE);
List<Document> instances;
if (eraseConfidenceValues) {
instances = new ArrayList<Document>(dataset.size());
Document newDoc;
for (Document originalDoc : dataset.getInstances()) {
newDoc = new DocumentImpl();
newDoc.setDocumentURI(originalDoc.getDocumentURI());
newDoc.setText(originalDoc.getText());
for (NamedEntity ne : originalDoc.getMarkings(NamedEntity.class)) {
newDoc.addMarking(new NamedEntity(ne.getStartPosition(), ne.getLength(), ne.getUris()));
}
instances.add(newDoc);
}
} else {
instances = dataset.getInstances();
}
return new TestAnnotatorConfiguration(instances, ExperimentType.A2KB);
}
示例6
@Before
public void createDoc() {
agdistis = new AgdistisWrapper();
String q = "Who are the successors of Barack Obama and Michelle Obama?";
String namedEntity1 = "Barack Obama";
String namedEntity2 = "Michelle Obama";
Document doc = new DocumentImpl(q);
NamedEntity obama = new NamedEntity(q.indexOf(namedEntity1), namedEntity1.length(), "someUri", true);
NamedEntity michelle = new NamedEntity(q.indexOf(namedEntity2), namedEntity2.length(), "someUri2", true);
doc.addMarking(obama);
doc.addMarking(michelle);
testDoc = doc;
}
示例7
@Test
public void testAgdistis() {
String testDocNif = NifEverything.getInstance().writeNIF(testDoc);
System.out.println(testDocNif);
String q = testDoc.getText();
Document doc = new DocumentImpl(q);
String namedEntity1 = "Barack Obama";
String namedEntity2 = "Michelle Obama";
NamedEntity obama = new NamedEntity(q.indexOf(namedEntity1), namedEntity1.length(), "http://dbpedia.org/resource/Barack_Obama");
NamedEntity michelle = new NamedEntity(q.indexOf(namedEntity2), namedEntity2.length(), "http://dbpedia.org/resource/Michelle_Obama");
doc.addMarking(obama);
doc.addMarking(michelle);
String shouldBeNif = NifEverything.getInstance().writeNIF(doc);
Assert.assertTrue("Should be: \n" + shouldBeNif + "\n but was\n" + testDocNif, shouldBeNif.equals(agdistis.process(testDocNif)));
}
示例8
private List<Marking> findMarkings(String[] text, File annFile) throws GerbilException {
List<Marking> markings = new ArrayList<Marking>();
try (BufferedReader breader = new BufferedReader(new InputStreamReader(
new FileInputStream(annFile), Charset.forName("UTF-8")))) {
String line;
while ((line = breader.readLine()) != null) {
if(line.isEmpty()){
continue;
}
String[] annotation = line.split("\t");
int searchID = getTrecID(text[0]);
int annoID = getTrecID(annotation[0]);
if(searchID == annoID){
int start = text[1].indexOf(annotation[3]);
int length = annotation[3].length();
//FIXME time consuming!
String freebaseID = annotation[2].substring(1, annotation[2].length()).replace("/",".");
Query query = QueryFactory.create(queryTemp.replace("%%v%%", freebaseID));
QueryExecution qexec = QueryExecutionFactory.createServiceRequest(DBPEDIA_SERVICE, query);
String uri = qexec.execSelect().next().getResource("s").getURI();
markings.add(new NamedEntity(start, length, uri));
}
else if(annoID > searchID){
//There is no annotation for the given text
break;
}
}
} catch (IOException e) {
throw new GerbilException("Exception while reading dataset.", e,
ErrorTypes.DATASET_LOADING_ERROR);
}
return markings;
}
示例9
/**
* Merge {@link NamedEntity}s that are sub spans of another named entity and
* that have the same URIs.
*
* @param document
*/
public static void mergeSubNamedEntity(Document document) {
List<NamedEntity> spanList = document.getMarkings(NamedEntity.class);
NamedEntity nes[] = spanList.toArray(new NamedEntity[spanList.size()]);
Arrays.sort(nes, new LengthBasedSpanComparator());
Set<Marking> markingsToRemove = new HashSet<Marking>();
boolean uriOverlapping;
Iterator<String> uriIterator;
for (int i = 0; i < nes.length; ++i) {
uriOverlapping = false;
for (int j = i + 1; (j < nes.length) && (!uriOverlapping); ++j) {
// if nes[i] is a "sub span" of nes[j]
if ((nes[i].getStartPosition() >= nes[j].getStartPosition()) && ((nes[i].getStartPosition()
+ nes[i].getLength()) <= (nes[j].getStartPosition() + nes[j].getLength()))) {
uriOverlapping = false;
uriIterator = nes[i].getUris().iterator();
while ((!uriOverlapping) && (uriIterator.hasNext())) {
uriOverlapping = nes[j].containsUri(uriIterator.next());
}
if (uriOverlapping) {
nes[j].getUris().addAll(nes[j].getUris());
markingsToRemove.add(nes[i]);
} else {
LOGGER.debug("There are two overlapping named entities with different URI sets. {}, {}", nes[i],
nes[j]);
}
}
}
}
document.getMarkings().removeAll(markingsToRemove);
}
示例10
protected Document createDocument(String fileName, String text, Set<IITB_Annotation> annotations) {
String documentUri = generateDocumentUri(fileName);
List<Marking> markings = new ArrayList<Marking>(annotations.size());
int endPosition;
Set<String> uris;
for (IITB_Annotation annotation : annotations) {
endPosition = annotation.offset + annotation.length;
if ((annotation.offset > 0) && (Character.isAlphabetic(text.charAt(annotation.offset - 1)))) {
LOGGER.warn("In document " + documentUri + ", the named entity \""
+ text.substring(annotation.offset, annotation.offset + annotation.length)
+ "\" has an alphabetic character in front of it (\"" + text.charAt(annotation.offset - 1)
+ "\").");
}
if (Character.isWhitespace(text.charAt(annotation.offset))) {
LOGGER.warn("In document " + documentUri + ", the named entity \""
+ text.substring(annotation.offset, endPosition) + "\" starts with a whitespace.");
}
if ((endPosition < text.length()) && Character.isAlphabetic(text.charAt(endPosition))) {
LOGGER.warn("In document " + documentUri + ", the named entity \""
+ text.substring(annotation.offset, endPosition)
+ "\" has an alphabetic character directly behind it (\"" + text.charAt(endPosition) + "\").");
}
if (Character.isWhitespace(text.charAt(endPosition - 1))) {
LOGGER.warn("In document " + documentUri + ", the named entity \""
+ text.substring(annotation.offset, annotation.offset + annotation.length)
+ "\" ends with a whitespace.");
}
uris = WikipediaHelper.generateUriSet(annotation.wikiTitle);
if (uris.size() == 0) {
uris.add(generateEntityUri());
}
markings.add(new NamedEntity(annotation.offset, annotation.length, uris));
}
return new DocumentImpl(text, documentUri, markings);
}
示例11
private void parseAnnotation(JSONObject entityObject, Document resultDoc) {
if (entityObject.has(ANNOTATION_TITLE_KEY) && entityObject.has(START_KEY) && entityObject.has(END_KEY)) {
String uri = transformTitleToUri(entityObject.getString(ANNOTATION_TITLE_KEY));
int start = entityObject.getInt(START_KEY);
int end = entityObject.getInt(END_KEY);
if (entityObject.has(ANNOTATION_GOODNESS_KEY)) {
resultDoc.addMarking(new ScoredNamedEntity(start, end - start, uri,
entityObject.getDouble(ANNOTATION_GOODNESS_KEY)));
} else {
resultDoc.addMarking(new NamedEntity(start, end - start, uri));
}
}
}
示例12
public static Document reduceToTextAndEntities(Document document) {
MarkingFilter<TypedNamedEntity> filter = new TypeBasedMarkingFilter<TypedNamedEntity>(false,
RDFS.Class.getURI(), OWL.Class.getURI());
List<TypedNamedEntity> namedEntities = document.getMarkings(TypedNamedEntity.class);
List<Marking> markings = new ArrayList<Marking>(namedEntities.size());
for (TypedNamedEntity tne : namedEntities) {
if (filter.isMarkingGood(tne)) {
markings.add(new NamedEntity(tne.getStartPosition(), tne.getLength(), tne.getUris()));
}
}
return new DocumentImpl(document.getText(), document.getDocumentURI(), markings);
}
示例13
private static void loadExpectedSet() {
assertThat(EXPECTED_DOCUMENTS, is(nullValue()));
EXPECTED_DOCUMENTS = new ArrayList<>();
assertThat(EXPECTED_DOCUMENTS, is(notNullValue()));
assertThat(EXPECTED_DOCUMENTS.size(), is(0));
List<String> text = new ArrayList<>();
List<List<Marking>> markings = new ArrayList<>();
text.add("..TREC-1.adobe indian houses..TREC-2.atypical squamous cells..TREC-3.battles in the civil war..TREC-4.becoming a paralegal..TREC-5.best long term care insurance..TREC-6.blue throated hummingbird..TREC-7.bowflex power pro..TREC-8.brooks brothers clearance..TREC-9.butter and margarine..TREC-10.california franchise tax board..TREC-11.cass county missouri..TREC-12.civil right movement..TREC-13.condos in florida..TREC-14.culpeper national cemetery..TREC-15.dangers of asbestos..TREC-16.designer dog breeds..TREC-17.discovery channel store..TREC-18.dog clean up bags..TREC-19.dogs for adoption..TREC-20.dutchess county tourism..TREC-21.earn money at home..TREC-22.east ridge high school..TREC-23.electronic skeet shoot..TREC-24.equal opportunity employer..TREC-25.er tv show..TREC-26.fact on uranus..TREC-27.fickle creek farm..TREC-28.french lick resort and casino..TREC-29.furniture for small spaces..TREC-30.gmat prep classes..TREC-31.gs pay rate..TREC-32.how to build a fence..TREC-33.hp mini 2140..TREC-34.illinois state tax..TREC-35.income tax return online..TREC-36.indiana child support..");
markings.add(Arrays.asList(
(Marking) new NamedEntity(203, 7, "https://www.googleapis.com/freebase/m/04cnvy"),
(Marking) new NamedEntity(229, 15, "https://www.googleapis.com/freebase/m/03d452"),
(Marking) new NamedEntity(333, 20, "https://www.googleapis.com/freebase/m/0nfgq"),
(Marking) new NamedEntity(393, 5, "https://www.googleapis.com/freebase/m/020ys5"),
(Marking) new NamedEntity(403, 7, "https://www.googleapis.com/freebase/m/02xry"),
(Marking) new NamedEntity(420, 26, "https://www.googleapis.com/freebase/m/0c4tkd"),
(Marking) new NamedEntity(601, 15, "https://www.googleapis.com/freebase/m/0dc3_"),
(Marking) new NamedEntity(662, 22, "https://www.googleapis.com/freebase/m/03ck4lv"),
(Marking) new NamedEntity(662, 22, "https://www.googleapis.com/freebase/m/027311j"),
(Marking) new NamedEntity(662, 22, "https://www.googleapis.com/freebase/m/0bs8gsb"),
(Marking) new NamedEntity(762, 2, "https://www.googleapis.com/freebase/m/0180mw"),
(Marking) new NamedEntity(833, 29, "https://www.googleapis.com/freebase/m/02761b3"),
(Marking) new NamedEntity(872, 9, "https://www.googleapis.com/freebase/m/0c_jw"),
(Marking) new NamedEntity(913, 4, "https://www.googleapis.com/freebase/m/065y10k"),
(Marking) new NamedEntity(1008, 14, "https://www.googleapis.com/freebase/m/03v0t"),
(Marking) new NamedEntity(1070, 7, "https://www.googleapis.com/freebase/m/03v1s")
));
EXPECTED_DOCUMENTS = new ArrayList<>();
for (int i = 0; i < 1; i++){
EXPECTED_DOCUMENTS.add(new DocumentImpl(text.get(i), DOCUMENT_URI.get(i), markings.get(i)));
}
}
示例14
@Test
public void test() {
List<Marking> markings = DerczynskiDataset.findMarkings(text);
Assert.assertNotNull(markings);
Assert.assertTrue(markings.size() > 0);
Assert.assertTrue(markings.get(0) instanceof NamedEntity);
NamedEntity ne = (NamedEntity) markings.get(0);
String mention = tweet.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
Assert.assertEquals(expectedToken, mention);
}
示例15
public void test() {
List<Marking> markings = RitterDataset.findMarkings(text);
Assert.assertNotNull(markings);
Assert.assertTrue(markings.size() > 0);
Assert.assertTrue(markings.get(0) instanceof NamedEntity);
TypedNamedEntity ne = (TypedNamedEntity) markings.get(0);
ne.getTypes().iterator().next().equals(expectedToken[1]);
String mention = tweet.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
Assert.assertEquals(expectedToken[0], mention);
}
示例16
@Test
public void checkLoadDatasets() throws Exception {
File file = File.createTempFile("GERDAQ", ".xml");
FileUtils.write(file,
"<?xml version='1.0' encoding='UTF-8'?>" + String.format("%n")
+ "<dataset><instance>loris <annotation rank_0_id=\"44017\" rank_0_score=\"0.925555555556\" rank_0_title=\"Candle\">candle</annotation> sampler</instance><instance><annotation rank_0_id=\"230699\" rank_0_score=\"0.666666666667\" rank_0_title=\"Conveyancing\">buying land</annotation> and <annotation rank_0_id=\"21883824\" rank_0_score=\"1.0\" rank_0_title=\"Arizona\">arizona</annotation></instance><instance>hip gry pl</instance></dataset>",
StandardCharsets.UTF_8.toString());
String docUriStart = GERDAQDataset.generateDocumentUri(DATASET_NAME, file.getName());
List<Document> expectedDocuments = Arrays.asList(
new DocumentImpl("loris candle sampler", docUriStart + 0,
Arrays.asList(new NamedEntity(6, 6, "http://dbpedia.org/resource/Candle"))),
new DocumentImpl("buying land and arizona", docUriStart + 1,
Arrays.asList(new NamedEntity(0, 11, "http://dbpedia.org/resource/Conveyancing"),
new NamedEntity(16, 7, "http://dbpedia.org/resource/Arizona"))),
new DocumentImpl("hip gry pl", docUriStart + 2, new ArrayList<Marking>(0)));
GERDAQDataset dataset = new GERDAQDataset(file.getAbsolutePath());
try {
dataset.setName(DATASET_NAME);
dataset.init();
Assert.assertArrayEquals(expectedDocuments.toArray(new Document[3]),
dataset.getInstances().toArray(new Document[3]));
} finally {
dataset.close();
}
}
示例17
@Test
public void test() {
List<Marking> markings = UMBCDataset.findMarkings(text);
Assert.assertNotNull(markings);
Assert.assertTrue(markings.size() > 0);
Assert.assertTrue(markings.get(0) instanceof NamedEntity);
NamedEntity ne = (NamedEntity) markings.get(0);
String mention = tweet.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
Assert.assertEquals(expectedToken, mention);
}
示例18
@Parameters
public static Collection<Object[]> data() {
List<Object[]> testConfigs = new ArrayList<Object[]>();
// The extractor returns nothing
testConfigs.add(new Object[] { Arrays.asList(new Span[0]), new Span[0] });
testConfigs.add(new Object[] { Arrays.asList(new SpanImpl(0, 5), new SpanImpl(2, 2)),
new Span[] { new SpanImpl(0, 5) } });
testConfigs.add(new Object[] { Arrays.asList(new SpanImpl(0, 5), new SpanImpl(0, 3)),
new Span[] { new SpanImpl(0, 5) } });
testConfigs.add(new Object[] { Arrays.asList(new SpanImpl(0, 5), new SpanImpl(2, 3)),
new Span[] { new SpanImpl(0, 5) } });
testConfigs.add(new Object[] { Arrays.asList(new SpanImpl(0, 5), new SpanImpl(2, 6)),
new Span[] { new SpanImpl(0, 5), new SpanImpl(2, 6) } });
testConfigs.add(new Object[] { Arrays.asList(new SpanImpl(0, 5), new SpanImpl(2, 6), new SpanImpl(1, 3)),
new Span[] { new SpanImpl(0, 5), new SpanImpl(2, 6) } });
testConfigs.add(new Object[] { Arrays.asList(new SpanImpl(2, 3), new SpanImpl(0, 5), new SpanImpl(1, 3)),
new Span[] { new SpanImpl(0, 5) } });
testConfigs.add(new Object[] {
Arrays.asList(new TypedSpanImpl(2, 3, new HashSet<String>(Arrays.asList("T1"))), new SpanImpl(0, 5),
new SpanImpl(1, 3)),
new Span[] { new TypedSpanImpl(0, 5, new HashSet<String>(Arrays.asList("T1"))) } });
testConfigs.add(new Object[] {
Arrays.asList(new TypedSpanImpl(2, 3, new HashSet<String>(Arrays.asList("T1", "T3"))), new SpanImpl(0,
5), new TypedSpanImpl(1, 3, new HashSet<String>(Arrays.asList("T2", "T3")))),
new Span[] { new TypedSpanImpl(0, 5, new HashSet<String>(Arrays.asList("T1", "T2", "T3"))) } });
testConfigs.add(new Object[] {
Arrays.asList(new NamedEntity(2, 3, new HashSet<String>(Arrays.asList("E1", "E3"))),
new SpanImpl(0, 5), new NamedEntity(1, 3, new HashSet<String>(Arrays.asList("E2", "E3")))),
new Span[] { new NamedEntity(0, 5, new HashSet<String>(Arrays.asList("E1", "E2", "E3"))) } });
testConfigs.add(new Object[] {
Arrays.asList(new TypedSpanImpl(2, 3, new HashSet<String>(Arrays.asList("T1"))), new SpanImpl(0, 5),
new NamedEntity(1, 3, "E1")),
new Span[] { new TypedNamedEntity(0, 5, "E1", new HashSet<String>(Arrays.asList("T1"))) } });
return testConfigs;
}
示例19
@Parameters
public static Collection<Object[]> data() {
List<Object[]> testConfigs = new ArrayList<Object[]>();
// The extractor returns nothing
testConfigs.add(new Object[] { new NamedEntity[0][0], 0, null });
testConfigs.add(new Object[] { new NamedEntity[2][0], 0, null });
testConfigs.add(new Object[] { new NamedEntity[][] {
{ new ScoredNamedEntity(0, 1, WRONG_MARKING, 0.1), new ScoredNamedEntity(0, 1, CORRECT_MARKING, 0.2),
new ScoredNamedEntity(0, 1, CORRECT_MARKING, 0.3) } },
1.0, new Double(0.1) });
testConfigs.add(new Object[] { new NamedEntity[][] { { new ScoredNamedEntity(0, 1, WRONG_MARKING, 0.1) },
{ new ScoredNamedEntity(0, 1, CORRECT_MARKING, 0.2),
new ScoredNamedEntity(0, 1, CORRECT_MARKING, 0.3) } },
1.0, new Double(0.1) });
testConfigs.add(new Object[] { new NamedEntity[][] { { new ScoredNamedEntity(0, 1, CORRECT_MARKING, 0.1),
new ScoredNamedEntity(0, 1, WRONG_MARKING, 0.2), new ScoredNamedEntity(0, 1, WRONG_MARKING, 0.3) } },
1.0 / 3.0, new Double(0) });
testConfigs.add(new Object[] {
new NamedEntity[][] { { new ScoredNamedEntity(0, 1, CORRECT_MARKING, 0.1),
new ScoredNamedEntity(0, 1, WRONG_MARKING, 0.2), new NamedEntity(0, 1, WRONG_MARKING) } },
1.0 / 3.0, new Double(0) });
testConfigs.add(new Object[] {
new NamedEntity[][] { { new ScoredNamedEntity(0, 1, WRONG_MARKING, 0.1),
new NamedEntity(0, 1, CORRECT_MARKING), new NamedEntity(0, 1, CORRECT_MARKING) } },
1.0, new Double(0.1) });
return testConfigs;
}
示例20
public ConfidenceScoreEvaluatorDecoratorTest(NamedEntity annotatorResults[][], double expectedScore,
Double expectedThreshold) {
this.annotatorResults = new ArrayList<List<NamedEntity>>(annotatorResults.length);
for (int i = 0; i < annotatorResults.length; ++i) {
this.annotatorResults.add(Arrays.asList(annotatorResults[i]));
}
this.expectedScore = expectedScore;
this.expectedThreshold = expectedThreshold;
}
示例21
@Parameters
public static Collection<Object[]> data() {
List<Object[]> testConfigs = new ArrayList<Object[]>();
// The recognizer found everything, but marked the word "Movie"
// additionally.
testConfigs.add(new Object[] {
new Document[] { new DocumentImpl(TEXTS[0], "doc-0",
Arrays.asList((Marking) new RelationImpl(
new NamedEntity(0, 22, "http://dbpedia.org/resource/Conor_McGregor"),
new Annotation("http://dbpedia.org/ontology/trainer"),
new NamedEntity(35, 48, "http://aksw.org/notInWiki/John_Kavanagh")))) },
GOLD_STD, Matching.STRONG_ANNOTATION_MATCH, new double[] { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0 } });
testConfigs.add(new Object[] {
new Document[] { new DocumentImpl(TEXTS[0], "doc-0",
Arrays.asList((Marking) new RelationImpl(
new NamedEntity(35, 48, "http://dbpedia.org/resource/John_Kavanagh"),
new Annotation("http://dbpedia.org/ontology/trainer"),
new NamedEntity(0, 22, "http://aksw.org/notInWiki/Conor_McGregor")))) },
GOLD_STD, Matching.STRONG_ANNOTATION_MATCH, new double[] { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0 } });
testConfigs
.add(new Object[] {
new Document[] {
new DocumentImpl(TEXTS[0], "doc-0",
Arrays.asList(
(Marking) new RelationImpl(
new NamedEntity(35, 48,
"http://dbpedia.org/resource/John_Kavanagh"),
new Annotation("http://dbpedia.org/ontology/trainer"),
new NamedEntity(0, 22,
"http://aksw.org/notInWiki/Conor_McGregor")),
new RelationImpl(
new NamedEntity(0, 22,
"http://dbpedia.org/resource/Conor_McGregor"),
new Annotation("http://dbpedia.org/ontology/trainer"),
new NamedEntity(35, 48,
"http://aksw.org/notInWiki/John_Kavanagh")))) },
GOLD_STD, Matching.STRONG_ANNOTATION_MATCH, new double[] { 0.5, 1.0, 1/1.5, 0.5, 1.0, 1/1.5, 0 } });
return testConfigs;
}
示例22
@Parameters
public static Collection<Object[]> data() {
List<Object[]> testConfigs = new ArrayList<Object[]>();
// The recognizer found everything, but marked the word "Movie"
// additionally.
testConfigs.add(new Object[] {
new Document[] { new DocumentImpl(TEXTS[0], "doc-0",
Arrays.asList((Marking) new NamedEntity(0, 22, "http://dbpedia.org/resource/Conor_McGregor"),
new NamedEntity(35, 48, "http://aksw.org/notInWiki/John_Kavanagh"),
new RelationImpl(new NamedEntity(0, 22, "http://dbpedia.org/resource/Conor_McGregor"),
new Annotation("http://dbpedia.org/ontology/trainer"),
new NamedEntity(35, 48, "http://aksw.org/notInWiki/John_Kavanagh")))) },
GOLD_STD, Matching.STRONG_ANNOTATION_MATCH, new double[] { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0 } });
testConfigs.add(new Object[] {
new Document[] { new DocumentImpl(TEXTS[0], "doc-0",
Arrays.asList((Marking) new RelationImpl(
new NamedEntity(35, 48, "http://dbpedia.org/resource/John_Kavanagh"),
new Annotation("http://dbpedia.org/ontology/trainer"),
new NamedEntity(0, 22, "http://aksw.org/notInWiki/Conor_McGregor")))) },
GOLD_STD, Matching.STRONG_ANNOTATION_MATCH, new double[] { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0 } });
testConfigs.add(new Object[] {
new Document[] { new DocumentImpl(TEXTS[0], "doc-0",
Arrays.asList((Marking) new NamedEntity(35, 48, "http://aksw.org/notInWiki/John_Kavanagh"),
new RelationImpl(new NamedEntity(35, 48, "http://aksw.org/notInWiki/John_Kavanagh"),
new Annotation("http://dbpedia.org/ontology/trainer"),
new NamedEntity(0, 22, "http://aksw.org/notInWiki/Conor_McGregor")),
new RelationImpl(new NamedEntity(0, 22, "http://www.wikidata.org/entity/Q5162259"),
new Annotation("http://dbpedia.org/ontology/trainer"),
new NamedEntity(35, 48, "http://aksw.org/notInWiki/John_Kavanagh")))) },
GOLD_STD, Matching.STRONG_ANNOTATION_MATCH, new double[] { 0.75, 0.75, 1/1.5, 0.75, 0.75, 1/1.5, 0 } });
return testConfigs;
}
示例23
public String NIFGerbil(InputStream input, NEDAlgo_HITS agdistis) throws IOException {
org.aksw.gerbil.transfer.nif.Document document;
String nifDocument = "";
String textWithMentions = "";
List<MeaningSpan> annotations = new ArrayList<>();
try {
document = parser.getDocumentFromNIFStream(input);
log.info("NIF file coming from GERBIL");
textWithMentions = nifParser.createTextWithMentions(document.getText(), document.getMarkings(Span.class));
Document d = textToDocument(textWithMentions);
agdistis.run(d, null);
for (NamedEntityInText namedEntity : d.getNamedEntitiesInText()) {
String disambiguatedURL = namedEntity.getNamedEntityUri();
if (disambiguatedURL == null || !namedEntity.getNamedEntityUri().contains("http")) {
annotations.add(new NamedEntity(namedEntity.getStartPos(), namedEntity.getLength(), URLDecoder
.decode("http://aksw.org/notInWiki/" + namedEntity.getSingleWordLabel(), "UTF-8")));
} else {
annotations.add(new NamedEntity(namedEntity.getStartPos(), namedEntity.getLength(),
URLDecoder.decode(namedEntity.getNamedEntityUri(), "UTF-8")));
}
}
document.setMarkings(new ArrayList<Marking>(annotations));
log.debug("Result: " + document.toString());
nifDocument = creator.getDocumentAsNIFString(document);
log.debug(nifDocument);
} catch (Exception e) {
log.error("Exception while reading request.", e);
return "";
}
agdistis.close();
return nifDocument;
}
示例24
public String NIFType(String text, NEDAlgo_HITS agdistis) throws IOException {
org.aksw.gerbil.transfer.nif.Document document = null;
String nifDocument = "";
NIFParser nifParser = new NIFParser();
String textWithMentions = "";
List<MeaningSpan> annotations = new ArrayList<>();
try {
document = parser.getDocumentFromNIFString(text);
log.debug("Request: " + document.toString());
textWithMentions = nifParser.createTextWithMentions(document.getText(), document.getMarkings(Span.class));
Document d = textToDocument(textWithMentions);
agdistis.run(d, null);
for (NamedEntityInText namedEntity : d.getNamedEntitiesInText()) {
String disambiguatedURL = namedEntity.getNamedEntityUri();
if (disambiguatedURL == null) {
annotations.add(new NamedEntity(namedEntity.getStartPos(), namedEntity.getLength(), URLDecoder
.decode("http://aksw.org/notInWiki/" + namedEntity.getSingleWordLabel(), "UTF-8")));
} else {
annotations.add(new NamedEntity(namedEntity.getStartPos(), namedEntity.getLength(),
URLDecoder.decode(disambiguatedURL, "UTF-8")));
}
}
document.setMarkings(new ArrayList<Marking>(annotations));
log.debug("Result: " + document.toString());
nifDocument = creator.getDocumentAsNIFString(document);
} catch (Exception e) {
log.error("Exception while reading request.", e);
return "";
}
agdistis.close();
return nifDocument;
}
示例25
protected static List<Marking> findMarkings(String line[], String text) {
List<Marking> markings = new ArrayList<Marking>(line.length / 2);
String textWithoutHashes = null;
int start, pos;
IntArrayList hashes = new IntArrayList();
int end = 0;
for (int i = FIRST_ANNOTATION_INDEX; i < line.length; i = i + 2) {
start = text.indexOf(line[i], end);
// The mentioned entity couldn't be found. Let's search
// in a text that contains no hashes.
if (start < 0) {
if (textWithoutHashes == null) {
/*
* A very simple workaround to search for a mention without
* hashes. Note that this only works, if the mention
* couldn't be found because the tweets contains hash tags
* that should be part of the mentions.
*/
pos = text.indexOf('#');
while (pos >= 0) {
hashes.add(pos);
pos = text.indexOf('#', pos + 1);
}
textWithoutHashes = text.replaceAll("#", "");
}
// The offset might have been moved through the
// removing
// of the hashes.
for (int j = 0; (i < hashes.elementsCount) && (hashes.buffer[j] < end); ++j) {
--end;
}
// search again
start = textWithoutHashes.indexOf(line[i], end);
if (start >= 0) {
// find the start and end positions of the
// mention
// inside the original tweet by looking at the
// list
// of hashes
end = start + line[i].length();
for (int j = 0; (j < hashes.elementsCount) && (hashes.buffer[j] < end); ++j) {
++end;
if (hashes.buffer[j] < start) {
++start;
}
}
}
} else {
end = start + line[i].length();
}
if (start < 0) {
LOGGER.warn("Couldn't find \"{}\" inside \"{}\". This annotation will be ignored.", line[i], text);
} else {
markings.add(new NamedEntity(start, end - start, line[i + 1]));
}
}
return markings;
}
示例26
public NamedEntity toNamedEntity() {
return new NamedEntity(startPosition, length, uris);
}
示例27
public NamedEntity translate(it.unipi.di.acube.batframework.data.Annotation annotation) {
return new NamedEntity(annotation.getPosition(), annotation.getLength(), translateWId(annotation.getConcept()));
}
示例28
public NamedEntity translate(it.unipi.di.acube.batframework.data.ScoredAnnotation annotation) {
return new ScoredNamedEntity(annotation.getPosition(), annotation.getLength(),
translateWId(annotation.getConcept()), annotation.getScore());
}
示例29
protected void parseMarkings(JSONObject outObj, Document resultDoc) throws GerbilException {
try {
if (outObj != null && outObj.has("mentions")) {
JSONArray mentions = outObj.getJSONArray("mentions");
if (mentions != null) {
JSONObject mention, bestEntity;
int offset, length;
Set<String> uris;
double confidence;
for (int i = 0; i < mentions.length(); ++i) {
mention = mentions.getJSONObject(i);
if (mention != null && mention.has("bestEntity") && mention.has("offset")
&& mention.has("length")) {
offset = mention.getInt("offset");
length = mention.getInt("length");
bestEntity = mention.getJSONObject("bestEntity");
uris = null;
confidence = -1;
if (bestEntity != null && bestEntity.has("kbIdentifier")) {
uris = generateUriSet(bestEntity.getString("kbIdentifier"));
if (bestEntity.has("disambiguationScore")) {
confidence = bestEntity.getDouble("disambiguationScore");
}
if (uris != null) {
if (confidence > -1) {
resultDoc.addMarking(new ScoredNamedEntity(offset, length, uris, confidence));
} else {
resultDoc.addMarking(new NamedEntity(offset, length, uris));
}
}
} else {
LOGGER.warn("Got an incomplete mention from AIDA: {}. It will be ignored",
bestEntity.toString());
}
}
}
}
}
} catch (Exception e) {
throw new GerbilException("Got an Exception while parsing the response of AIDA.", e,
ErrorTypes.UNEXPECTED_EXCEPTION);
}
}
示例30
public TypeExtractionResult(List<NamedEntity> types, TypingInfo typeInfo) {
super();
this.types = types;
this.typeInfo = typeInfo;
}