Java源码示例:org.aksw.gerbil.transfer.nif.data.NamedEntity

示例1
@Override
public void endElement(String uri, String localName, String qName)
		throws SAXException {
	if (qName.equalsIgnoreCase(SENTENCE_ELEMENT)) {
		i++;
		documents.add(new DocumentImpl(sentence.toString(),
				"http://senseval" + i, markings));
		sentence = new StringBuilder();
	} else if (qName.equalsIgnoreCase(INSTANCE_ELEMENT)) {
		markings.add(new NamedEntity(start, length, instanceUri));
		start = sentence.length();
	} else if (qName.equalsIgnoreCase(WF_ELEMENT)) {
		start = sentence.length();

	}
	this.field = 0;
}
 
示例2
@Test
public void test() throws GerbilException, IOException {
	SensevalDataset data = new SensevalDataset(this.file);
	data.init();
	List<Document> documents = data.getInstances();
	Document doc = documents.get(docIndex);
	assertEquals(expectedSentence, doc.getText());
	List<Marking> markings = doc.getMarkings();
	String[] marks = new String[markings.size()];
	for(int i=0; i<markings.size();i++){
		NamedEntity entity = ((NamedEntity)markings.get(i));
		marks[i]=doc.getText().substring(entity.getStartPosition(), 
				entity.getStartPosition()+entity.getLength());
	}
	assertArrayEquals(expectedMarkings, 
			marks);
	data.close();

}
 
示例3
@Test
public void test() {
    ConfidenceScoreEvaluatorDecorator<NamedEntity> decorator = new ConfidenceScoreEvaluatorDecorator<NamedEntity>(
            this, EVALUTION_RESULT_NAME, this);
    EvaluationResultContainer results = new EvaluationResultContainer();
    decorator.evaluate(annotatorResults, new ArrayList<List<NamedEntity>>(), results);
    boolean evalationResultFound = false;
    boolean scoreThresholdFound = false;
    for (EvaluationResult result : results.getResults()) {
        if (EVALUTION_RESULT_NAME.equals(result.getName())) {
            evalationResultFound = true;
            Assert.assertEquals(expectedScore, result.getValue());
        }
        if (ConfidenceScoreEvaluatorDecorator.CONFIDENCE_SCORE_THRESHOLD_RESULT_NAME.equals(result.getName())) {
            scoreThresholdFound = true;
            Assert.assertEquals(expectedThreshold, result.getValue());
        }
    }
    Assert.assertTrue(evalationResultFound);
    if (expectedThreshold != null) {
        Assert.assertTrue(scoreThresholdFound);
    }
}
 
示例4
@Override
public void evaluate(List<List<NamedEntity>> annotatorResults, List<List<NamedEntity>> goldStandard,
        EvaluationResultContainer results) {
    // all gold standards in this test are empty
    Assert.assertEquals(0, goldStandard.size());
    // simply count all correct named entities
    int score = 0, sum = 0;
    for (List<NamedEntity> nes : annotatorResults) {
        for (NamedEntity ne : nes) {
            if (ne.containsUri(CORRECT_MARKING)) {
                ++score;
            }
            ++sum;
        }
    }
    if (sum == 0) {
        results.addResult(new DoubleEvaluationResult(EVALUTION_RESULT_NAME, 0));
    } else {
        results.addResult(new DoubleEvaluationResult(EVALUTION_RESULT_NAME, (double) score / (double) sum));
    }
}
 
示例5
public AnnotatorConfiguration loadAnnotatorFile(String annotatorFileName, boolean eraseConfidenceValues)
        throws GerbilException {
    Dataset dataset = (new NIFFileDatasetConfig("ANNOTATOR", annotatorFileName, false, EXPERIMENT_TYPE, null, null))
            .getDataset(EXPERIMENT_TYPE);
    List<Document> instances;
    if (eraseConfidenceValues) {
        instances = new ArrayList<Document>(dataset.size());
        Document newDoc;
        for (Document originalDoc : dataset.getInstances()) {
            newDoc = new DocumentImpl();
            newDoc.setDocumentURI(originalDoc.getDocumentURI());
            newDoc.setText(originalDoc.getText());
            for (NamedEntity ne : originalDoc.getMarkings(NamedEntity.class)) {
                newDoc.addMarking(new NamedEntity(ne.getStartPosition(), ne.getLength(), ne.getUris()));
            }
            instances.add(newDoc);
        }
    } else {
        instances = dataset.getInstances();
    }
    return new TestAnnotatorConfiguration(instances, ExperimentType.A2KB);
}
 
示例6
@Before
public void createDoc() {
	agdistis = new AgdistisWrapper();
	String q = "Who are the successors of Barack Obama and Michelle Obama?";
	String namedEntity1 = "Barack Obama";
	String namedEntity2 = "Michelle Obama";
	Document doc = new DocumentImpl(q);
	NamedEntity obama = new NamedEntity(q.indexOf(namedEntity1), namedEntity1.length(), "someUri", true);
	NamedEntity michelle = new NamedEntity(q.indexOf(namedEntity2), namedEntity2.length(), "someUri2", true);
	doc.addMarking(obama);
	doc.addMarking(michelle);
	testDoc = doc;
}
 
示例7
@Test
public void testAgdistis() {
	String testDocNif = NifEverything.getInstance().writeNIF(testDoc);
	System.out.println(testDocNif);
	String q = testDoc.getText();
	Document doc = new DocumentImpl(q);
	String namedEntity1 = "Barack Obama";
	String namedEntity2 = "Michelle Obama";
	NamedEntity obama = new NamedEntity(q.indexOf(namedEntity1), namedEntity1.length(), "http://dbpedia.org/resource/Barack_Obama");
	NamedEntity michelle = new NamedEntity(q.indexOf(namedEntity2), namedEntity2.length(), "http://dbpedia.org/resource/Michelle_Obama");
	doc.addMarking(obama);
	doc.addMarking(michelle);
	String shouldBeNif = NifEverything.getInstance().writeNIF(doc);
	Assert.assertTrue("Should be: \n" + shouldBeNif + "\n but was\n" + testDocNif, shouldBeNif.equals(agdistis.process(testDocNif)));
}
 
示例8
private List<Marking> findMarkings(String[] text, File annFile) throws GerbilException {
	List<Marking> markings = new ArrayList<Marking>();
	try (BufferedReader breader = new BufferedReader(new InputStreamReader(
			new FileInputStream(annFile), Charset.forName("UTF-8")))) {
		String line;
		
		while ((line = breader.readLine()) != null) {
			if(line.isEmpty()){
				continue;
			}
			String[] annotation = line.split("\t");
			int searchID = getTrecID(text[0]);
			int annoID = getTrecID(annotation[0]);
			if(searchID == annoID){
				int start = text[1].indexOf(annotation[3]);
				int length = annotation[3].length();
				
				//FIXME time consuming!
                   String freebaseID = annotation[2].substring(1, annotation[2].length()).replace("/",".");
                   Query query = QueryFactory.create(queryTemp.replace("%%v%%", freebaseID));
                   QueryExecution qexec = QueryExecutionFactory.createServiceRequest(DBPEDIA_SERVICE, query);
                   String uri =  qexec.execSelect().next().getResource("s").getURI();
                   
				
				markings.add(new NamedEntity(start, length, uri));
			}
			else if(annoID > searchID){
				//There is no annotation for the given text
				break;
			}
		}
	} catch (IOException e) {
		throw new GerbilException("Exception while reading dataset.", e,
				ErrorTypes.DATASET_LOADING_ERROR);
	}

	return markings;
}
 
示例9
/**
 * Merge {@link NamedEntity}s that are sub spans of another named entity and
 * that have the same URIs.
 * 
 * @param document
 */
public static void mergeSubNamedEntity(Document document) {
	List<NamedEntity> spanList = document.getMarkings(NamedEntity.class);
	NamedEntity nes[] = spanList.toArray(new NamedEntity[spanList.size()]);
	Arrays.sort(nes, new LengthBasedSpanComparator());
	Set<Marking> markingsToRemove = new HashSet<Marking>();
	boolean uriOverlapping;
	Iterator<String> uriIterator;
	for (int i = 0; i < nes.length; ++i) {
		uriOverlapping = false;
		for (int j = i + 1; (j < nes.length) && (!uriOverlapping); ++j) {
			// if nes[i] is a "sub span" of nes[j]
			if ((nes[i].getStartPosition() >= nes[j].getStartPosition()) && ((nes[i].getStartPosition()
					+ nes[i].getLength()) <= (nes[j].getStartPosition() + nes[j].getLength()))) {
				uriOverlapping = false;
				uriIterator = nes[i].getUris().iterator();
				while ((!uriOverlapping) && (uriIterator.hasNext())) {
					uriOverlapping = nes[j].containsUri(uriIterator.next());
				}
				if (uriOverlapping) {
					nes[j].getUris().addAll(nes[j].getUris());
					markingsToRemove.add(nes[i]);
				} else {
					LOGGER.debug("There are two overlapping named entities with different URI sets. {}, {}", nes[i],
							nes[j]);
				}
			}
		}
	}
	document.getMarkings().removeAll(markingsToRemove);
}
 
示例10
protected Document createDocument(String fileName, String text, Set<IITB_Annotation> annotations) {
    String documentUri = generateDocumentUri(fileName);
    List<Marking> markings = new ArrayList<Marking>(annotations.size());
    int endPosition;
    Set<String> uris;
    for (IITB_Annotation annotation : annotations) {
        endPosition = annotation.offset + annotation.length;
        if ((annotation.offset > 0) && (Character.isAlphabetic(text.charAt(annotation.offset - 1)))) {
            LOGGER.warn("In document " + documentUri + ", the named entity \""
                    + text.substring(annotation.offset, annotation.offset + annotation.length)
                    + "\" has an alphabetic character in front of it (\"" + text.charAt(annotation.offset - 1)
                    + "\").");
        }
        if (Character.isWhitespace(text.charAt(annotation.offset))) {
            LOGGER.warn("In document " + documentUri + ", the named entity \""
                    + text.substring(annotation.offset, endPosition) + "\" starts with a whitespace.");
        }
        if ((endPosition < text.length()) && Character.isAlphabetic(text.charAt(endPosition))) {
            LOGGER.warn("In document " + documentUri + ", the named entity \""
                    + text.substring(annotation.offset, endPosition)
                    + "\" has an alphabetic character directly behind it (\"" + text.charAt(endPosition) + "\").");
        }
        if (Character.isWhitespace(text.charAt(endPosition - 1))) {
            LOGGER.warn("In document " + documentUri + ", the named entity \""
                    + text.substring(annotation.offset, annotation.offset + annotation.length)
                    + "\" ends with a whitespace.");
        }
        uris = WikipediaHelper.generateUriSet(annotation.wikiTitle);
        if (uris.size() == 0) {
            uris.add(generateEntityUri());
        }
        markings.add(new NamedEntity(annotation.offset, annotation.length, uris));
    }
    return new DocumentImpl(text, documentUri, markings);
}
 
示例11
private void parseAnnotation(JSONObject entityObject, Document resultDoc) {
    if (entityObject.has(ANNOTATION_TITLE_KEY) && entityObject.has(START_KEY) && entityObject.has(END_KEY)) {
        String uri = transformTitleToUri(entityObject.getString(ANNOTATION_TITLE_KEY));
        int start = entityObject.getInt(START_KEY);
        int end = entityObject.getInt(END_KEY);
        if (entityObject.has(ANNOTATION_GOODNESS_KEY)) {
            resultDoc.addMarking(new ScoredNamedEntity(start, end - start, uri,
                    entityObject.getDouble(ANNOTATION_GOODNESS_KEY)));
        } else {
            resultDoc.addMarking(new NamedEntity(start, end - start, uri));
        }
    }
}
 
示例12
public static Document reduceToTextAndEntities(Document document) {
    MarkingFilter<TypedNamedEntity> filter = new TypeBasedMarkingFilter<TypedNamedEntity>(false,
            RDFS.Class.getURI(), OWL.Class.getURI());
    List<TypedNamedEntity> namedEntities = document.getMarkings(TypedNamedEntity.class);
    List<Marking> markings = new ArrayList<Marking>(namedEntities.size());
    for (TypedNamedEntity tne : namedEntities) {
        if (filter.isMarkingGood(tne)) {
            markings.add(new NamedEntity(tne.getStartPosition(), tne.getLength(), tne.getUris()));
        }
    }
    return new DocumentImpl(document.getText(), document.getDocumentURI(), markings);
}
 
示例13
private static void loadExpectedSet() {
    
    assertThat(EXPECTED_DOCUMENTS, is(nullValue()));
    
    EXPECTED_DOCUMENTS = new ArrayList<>();
    
    assertThat(EXPECTED_DOCUMENTS, is(notNullValue()));
    assertThat(EXPECTED_DOCUMENTS.size(), is(0));
    
    List<String> text = new ArrayList<>();
    List<List<Marking>> markings = new ArrayList<>();
    
    text.add("..TREC-1.adobe indian houses..TREC-2.atypical squamous cells..TREC-3.battles in the civil war..TREC-4.becoming a paralegal..TREC-5.best long term care insurance..TREC-6.blue throated hummingbird..TREC-7.bowflex power pro..TREC-8.brooks brothers clearance..TREC-9.butter and margarine..TREC-10.california franchise tax board..TREC-11.cass county missouri..TREC-12.civil right movement..TREC-13.condos in florida..TREC-14.culpeper national cemetery..TREC-15.dangers of asbestos..TREC-16.designer dog breeds..TREC-17.discovery channel store..TREC-18.dog clean up bags..TREC-19.dogs for adoption..TREC-20.dutchess county tourism..TREC-21.earn money at home..TREC-22.east ridge high school..TREC-23.electronic skeet shoot..TREC-24.equal opportunity employer..TREC-25.er tv show..TREC-26.fact on uranus..TREC-27.fickle creek farm..TREC-28.french lick resort and casino..TREC-29.furniture for small spaces..TREC-30.gmat prep classes..TREC-31.gs pay rate..TREC-32.how to build a fence..TREC-33.hp mini 2140..TREC-34.illinois state tax..TREC-35.income tax return online..TREC-36.indiana child support..");
    
    markings.add(Arrays.asList(
        (Marking) new NamedEntity(203, 7, "https://www.googleapis.com/freebase/m/04cnvy"),
        (Marking) new NamedEntity(229, 15, "https://www.googleapis.com/freebase/m/03d452"),
        (Marking) new NamedEntity(333, 20, "https://www.googleapis.com/freebase/m/0nfgq"),
        (Marking) new NamedEntity(393, 5, "https://www.googleapis.com/freebase/m/020ys5"),
        (Marking) new NamedEntity(403, 7, "https://www.googleapis.com/freebase/m/02xry"),
        (Marking) new NamedEntity(420, 26, "https://www.googleapis.com/freebase/m/0c4tkd"),
        (Marking) new NamedEntity(601, 15, "https://www.googleapis.com/freebase/m/0dc3_"),
        (Marking) new NamedEntity(662, 22, "https://www.googleapis.com/freebase/m/03ck4lv"),
        (Marking) new NamedEntity(662, 22, "https://www.googleapis.com/freebase/m/027311j"),
        (Marking) new NamedEntity(662, 22, "https://www.googleapis.com/freebase/m/0bs8gsb"),
        (Marking) new NamedEntity(762, 2, "https://www.googleapis.com/freebase/m/0180mw"),
        (Marking) new NamedEntity(833, 29, "https://www.googleapis.com/freebase/m/02761b3"),
        (Marking) new NamedEntity(872, 9, "https://www.googleapis.com/freebase/m/0c_jw"),
        (Marking) new NamedEntity(913, 4, "https://www.googleapis.com/freebase/m/065y10k"),
        (Marking) new NamedEntity(1008, 14, "https://www.googleapis.com/freebase/m/03v0t"),
        (Marking) new NamedEntity(1070, 7, "https://www.googleapis.com/freebase/m/03v1s")
    ));
    
    EXPECTED_DOCUMENTS = new ArrayList<>();
    
    for (int i = 0; i < 1; i++){
        EXPECTED_DOCUMENTS.add(new DocumentImpl(text.get(i), DOCUMENT_URI.get(i), markings.get(i)));
    }
    
}
 
示例14
@Test
public void test() {
    List<Marking> markings = DerczynskiDataset.findMarkings(text);
    Assert.assertNotNull(markings);
    Assert.assertTrue(markings.size() > 0);
    Assert.assertTrue(markings.get(0) instanceof NamedEntity);
    NamedEntity ne = (NamedEntity) markings.get(0);
    String mention = tweet.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
    Assert.assertEquals(expectedToken, mention);
}
 
示例15
public void test() {
    List<Marking> markings = RitterDataset.findMarkings(text);
    Assert.assertNotNull(markings);
    Assert.assertTrue(markings.size() > 0);
    Assert.assertTrue(markings.get(0) instanceof NamedEntity);
    TypedNamedEntity ne = (TypedNamedEntity) markings.get(0);
    ne.getTypes().iterator().next().equals(expectedToken[1]);
    String mention = tweet.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
    Assert.assertEquals(expectedToken[0], mention);
}
 
示例16
@Test
public void checkLoadDatasets() throws Exception {
    File file = File.createTempFile("GERDAQ", ".xml");
    FileUtils.write(file,
            "<?xml version='1.0' encoding='UTF-8'?>" + String.format("%n")
                    + "<dataset><instance>loris <annotation rank_0_id=\"44017\" rank_0_score=\"0.925555555556\" rank_0_title=\"Candle\">candle</annotation> sampler</instance><instance><annotation rank_0_id=\"230699\" rank_0_score=\"0.666666666667\" rank_0_title=\"Conveyancing\">buying land</annotation> and <annotation rank_0_id=\"21883824\" rank_0_score=\"1.0\" rank_0_title=\"Arizona\">arizona</annotation></instance><instance>hip gry pl</instance></dataset>",
            StandardCharsets.UTF_8.toString());
    String docUriStart = GERDAQDataset.generateDocumentUri(DATASET_NAME, file.getName());

    List<Document> expectedDocuments = Arrays.asList(
            new DocumentImpl("loris candle sampler", docUriStart + 0,
                    Arrays.asList(new NamedEntity(6, 6, "http://dbpedia.org/resource/Candle"))),
            new DocumentImpl("buying land and arizona", docUriStart + 1,
                    Arrays.asList(new NamedEntity(0, 11, "http://dbpedia.org/resource/Conveyancing"),
                            new NamedEntity(16, 7, "http://dbpedia.org/resource/Arizona"))),
            new DocumentImpl("hip gry pl", docUriStart + 2, new ArrayList<Marking>(0)));

    GERDAQDataset dataset = new GERDAQDataset(file.getAbsolutePath());
    try {
        dataset.setName(DATASET_NAME);
        dataset.init();

        Assert.assertArrayEquals(expectedDocuments.toArray(new Document[3]),
                dataset.getInstances().toArray(new Document[3]));
    } finally {
        dataset.close();
    }
}
 
示例17
@Test
public void test() {
    List<Marking> markings = UMBCDataset.findMarkings(text);
    Assert.assertNotNull(markings);
    Assert.assertTrue(markings.size() > 0);
    Assert.assertTrue(markings.get(0) instanceof NamedEntity);
    NamedEntity ne = (NamedEntity) markings.get(0);
    String mention = tweet.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
    Assert.assertEquals(expectedToken, mention);
}
 
示例18
@Parameters
public static Collection<Object[]> data() {
    List<Object[]> testConfigs = new ArrayList<Object[]>();
    // The extractor returns nothing
    testConfigs.add(new Object[] { Arrays.asList(new Span[0]), new Span[0] });
    testConfigs.add(new Object[] { Arrays.asList(new SpanImpl(0, 5), new SpanImpl(2, 2)),
            new Span[] { new SpanImpl(0, 5) } });
    testConfigs.add(new Object[] { Arrays.asList(new SpanImpl(0, 5), new SpanImpl(0, 3)),
            new Span[] { new SpanImpl(0, 5) } });
    testConfigs.add(new Object[] { Arrays.asList(new SpanImpl(0, 5), new SpanImpl(2, 3)),
            new Span[] { new SpanImpl(0, 5) } });
    testConfigs.add(new Object[] { Arrays.asList(new SpanImpl(0, 5), new SpanImpl(2, 6)),
            new Span[] { new SpanImpl(0, 5), new SpanImpl(2, 6) } });
    testConfigs.add(new Object[] { Arrays.asList(new SpanImpl(0, 5), new SpanImpl(2, 6), new SpanImpl(1, 3)),
            new Span[] { new SpanImpl(0, 5), new SpanImpl(2, 6) } });
    testConfigs.add(new Object[] { Arrays.asList(new SpanImpl(2, 3), new SpanImpl(0, 5), new SpanImpl(1, 3)),
            new Span[] { new SpanImpl(0, 5) } });
    testConfigs.add(new Object[] {
            Arrays.asList(new TypedSpanImpl(2, 3, new HashSet<String>(Arrays.asList("T1"))), new SpanImpl(0, 5),
                    new SpanImpl(1, 3)),
            new Span[] { new TypedSpanImpl(0, 5, new HashSet<String>(Arrays.asList("T1"))) } });
    testConfigs.add(new Object[] {
            Arrays.asList(new TypedSpanImpl(2, 3, new HashSet<String>(Arrays.asList("T1", "T3"))), new SpanImpl(0,
                    5), new TypedSpanImpl(1, 3, new HashSet<String>(Arrays.asList("T2", "T3")))),
            new Span[] { new TypedSpanImpl(0, 5, new HashSet<String>(Arrays.asList("T1", "T2", "T3"))) } });
    testConfigs.add(new Object[] {
            Arrays.asList(new NamedEntity(2, 3, new HashSet<String>(Arrays.asList("E1", "E3"))),
                    new SpanImpl(0, 5), new NamedEntity(1, 3, new HashSet<String>(Arrays.asList("E2", "E3")))),
            new Span[] { new NamedEntity(0, 5, new HashSet<String>(Arrays.asList("E1", "E2", "E3"))) } });
    testConfigs.add(new Object[] {
            Arrays.asList(new TypedSpanImpl(2, 3, new HashSet<String>(Arrays.asList("T1"))), new SpanImpl(0, 5),
                    new NamedEntity(1, 3, "E1")),
            new Span[] { new TypedNamedEntity(0, 5, "E1", new HashSet<String>(Arrays.asList("T1"))) } });
    return testConfigs;
}
 
示例19
@Parameters
public static Collection<Object[]> data() {
    List<Object[]> testConfigs = new ArrayList<Object[]>();
    // The extractor returns nothing
    testConfigs.add(new Object[] { new NamedEntity[0][0], 0, null });
    testConfigs.add(new Object[] { new NamedEntity[2][0], 0, null });
    testConfigs.add(new Object[] { new NamedEntity[][] {
            { new ScoredNamedEntity(0, 1, WRONG_MARKING, 0.1), new ScoredNamedEntity(0, 1, CORRECT_MARKING, 0.2),
                    new ScoredNamedEntity(0, 1, CORRECT_MARKING, 0.3) } },
            1.0, new Double(0.1) });
    testConfigs.add(new Object[] { new NamedEntity[][] { { new ScoredNamedEntity(0, 1, WRONG_MARKING, 0.1) },
            { new ScoredNamedEntity(0, 1, CORRECT_MARKING, 0.2),
                    new ScoredNamedEntity(0, 1, CORRECT_MARKING, 0.3) } },
            1.0, new Double(0.1) });
    testConfigs.add(new Object[] { new NamedEntity[][] { { new ScoredNamedEntity(0, 1, CORRECT_MARKING, 0.1),
            new ScoredNamedEntity(0, 1, WRONG_MARKING, 0.2), new ScoredNamedEntity(0, 1, WRONG_MARKING, 0.3) } },
            1.0 / 3.0, new Double(0) });
    testConfigs.add(new Object[] {
            new NamedEntity[][] { { new ScoredNamedEntity(0, 1, CORRECT_MARKING, 0.1),
                    new ScoredNamedEntity(0, 1, WRONG_MARKING, 0.2), new NamedEntity(0, 1, WRONG_MARKING) } },
            1.0 / 3.0, new Double(0) });
    testConfigs.add(new Object[] {
            new NamedEntity[][] { { new ScoredNamedEntity(0, 1, WRONG_MARKING, 0.1),
                    new NamedEntity(0, 1, CORRECT_MARKING), new NamedEntity(0, 1, CORRECT_MARKING) } },
            1.0, new Double(0.1) });
    return testConfigs;
}
 
示例20
public ConfidenceScoreEvaluatorDecoratorTest(NamedEntity annotatorResults[][], double expectedScore,
        Double expectedThreshold) {
    this.annotatorResults = new ArrayList<List<NamedEntity>>(annotatorResults.length);
    for (int i = 0; i < annotatorResults.length; ++i) {
        this.annotatorResults.add(Arrays.asList(annotatorResults[i]));
    }
    this.expectedScore = expectedScore;
    this.expectedThreshold = expectedThreshold;
}
 
示例21
@Parameters
public static Collection<Object[]> data() {
	List<Object[]> testConfigs = new ArrayList<Object[]>();
	// The recognizer found everything, but marked the word "Movie"
	// additionally.
	testConfigs.add(new Object[] {
			new Document[] { new DocumentImpl(TEXTS[0], "doc-0",
					Arrays.asList((Marking) new RelationImpl(
							new NamedEntity(0, 22, "http://dbpedia.org/resource/Conor_McGregor"),
							new Annotation("http://dbpedia.org/ontology/trainer"),
							new NamedEntity(35, 48, "http://aksw.org/notInWiki/John_Kavanagh")))) },
			GOLD_STD, Matching.STRONG_ANNOTATION_MATCH, new double[] { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0 } });
	testConfigs.add(new Object[] {
			new Document[] { new DocumentImpl(TEXTS[0], "doc-0",
					Arrays.asList((Marking) new RelationImpl(
							new NamedEntity(35, 48, "http://dbpedia.org/resource/John_Kavanagh"),
							new Annotation("http://dbpedia.org/ontology/trainer"),
							new NamedEntity(0, 22, "http://aksw.org/notInWiki/Conor_McGregor")))) },
			GOLD_STD, Matching.STRONG_ANNOTATION_MATCH, new double[] { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0 } });
	testConfigs
			.add(new Object[] {
					new Document[] {
							new DocumentImpl(TEXTS[0], "doc-0",
									Arrays.asList(
											(Marking) new RelationImpl(
													new NamedEntity(35, 48,
															"http://dbpedia.org/resource/John_Kavanagh"),
													new Annotation("http://dbpedia.org/ontology/trainer"),
													new NamedEntity(0, 22,
															"http://aksw.org/notInWiki/Conor_McGregor")),
											new RelationImpl(
													new NamedEntity(0, 22,
															"http://dbpedia.org/resource/Conor_McGregor"),
													new Annotation("http://dbpedia.org/ontology/trainer"),
													new NamedEntity(35, 48,
															"http://aksw.org/notInWiki/John_Kavanagh")))) },
					GOLD_STD, Matching.STRONG_ANNOTATION_MATCH, new double[] { 0.5, 1.0, 1/1.5, 0.5, 1.0, 1/1.5, 0 } });
	return testConfigs;
}
 
示例22
@Parameters
public static Collection<Object[]> data() {
	List<Object[]> testConfigs = new ArrayList<Object[]>();
	// The recognizer found everything, but marked the word "Movie"
	// additionally.
	testConfigs.add(new Object[] {
			new Document[] { new DocumentImpl(TEXTS[0], "doc-0",
					Arrays.asList((Marking) new NamedEntity(0, 22, "http://dbpedia.org/resource/Conor_McGregor"),
							new NamedEntity(35, 48, "http://aksw.org/notInWiki/John_Kavanagh"),
							new RelationImpl(new NamedEntity(0, 22, "http://dbpedia.org/resource/Conor_McGregor"),
									new Annotation("http://dbpedia.org/ontology/trainer"),
									new NamedEntity(35, 48, "http://aksw.org/notInWiki/John_Kavanagh")))) },
			GOLD_STD, Matching.STRONG_ANNOTATION_MATCH, new double[] { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0 } });
	testConfigs.add(new Object[] {
			new Document[] { new DocumentImpl(TEXTS[0], "doc-0",
					Arrays.asList((Marking) new RelationImpl(
							new NamedEntity(35, 48, "http://dbpedia.org/resource/John_Kavanagh"),
							new Annotation("http://dbpedia.org/ontology/trainer"),
							new NamedEntity(0, 22, "http://aksw.org/notInWiki/Conor_McGregor")))) },
			GOLD_STD, Matching.STRONG_ANNOTATION_MATCH, new double[] { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0 } });
	testConfigs.add(new Object[] {
			new Document[] { new DocumentImpl(TEXTS[0], "doc-0",
					Arrays.asList((Marking) new NamedEntity(35, 48, "http://aksw.org/notInWiki/John_Kavanagh"),
							new RelationImpl(new NamedEntity(35, 48, "http://aksw.org/notInWiki/John_Kavanagh"),
									new Annotation("http://dbpedia.org/ontology/trainer"),
									new NamedEntity(0, 22, "http://aksw.org/notInWiki/Conor_McGregor")),
							new RelationImpl(new NamedEntity(0, 22, "http://www.wikidata.org/entity/Q5162259"),
									new Annotation("http://dbpedia.org/ontology/trainer"),
									new NamedEntity(35, 48, "http://aksw.org/notInWiki/John_Kavanagh")))) },
			GOLD_STD, Matching.STRONG_ANNOTATION_MATCH, new double[] { 0.75, 0.75, 1/1.5,  0.75, 0.75, 1/1.5, 0 } });
	return testConfigs;
}
 
示例23
public String NIFGerbil(InputStream input, NEDAlgo_HITS agdistis) throws IOException {
	org.aksw.gerbil.transfer.nif.Document document;
	String nifDocument = "";
	String textWithMentions = "";
	List<MeaningSpan> annotations = new ArrayList<>();
	try {
		document = parser.getDocumentFromNIFStream(input);
		log.info("NIF file coming from GERBIL");
		textWithMentions = nifParser.createTextWithMentions(document.getText(), document.getMarkings(Span.class));
		Document d = textToDocument(textWithMentions);
		agdistis.run(d, null);
		for (NamedEntityInText namedEntity : d.getNamedEntitiesInText()) {
			String disambiguatedURL = namedEntity.getNamedEntityUri();

			if (disambiguatedURL == null || !namedEntity.getNamedEntityUri().contains("http")) {
				annotations.add(new NamedEntity(namedEntity.getStartPos(), namedEntity.getLength(), URLDecoder
						.decode("http://aksw.org/notInWiki/" + namedEntity.getSingleWordLabel(), "UTF-8")));
			} else {
				annotations.add(new NamedEntity(namedEntity.getStartPos(), namedEntity.getLength(),
						URLDecoder.decode(namedEntity.getNamedEntityUri(), "UTF-8")));
			}
		}
		document.setMarkings(new ArrayList<Marking>(annotations));
		log.debug("Result: " + document.toString());
		nifDocument = creator.getDocumentAsNIFString(document);
		log.debug(nifDocument);

	} catch (Exception e) {
		log.error("Exception while reading request.", e);
		return "";
	}
	agdistis.close();
	return nifDocument;
}
 
示例24
public String NIFType(String text, NEDAlgo_HITS agdistis) throws IOException {
	org.aksw.gerbil.transfer.nif.Document document = null;
	String nifDocument = "";
	NIFParser nifParser = new NIFParser();
	String textWithMentions = "";
	List<MeaningSpan> annotations = new ArrayList<>();

	try {
		document = parser.getDocumentFromNIFString(text);
		log.debug("Request: " + document.toString());
		textWithMentions = nifParser.createTextWithMentions(document.getText(), document.getMarkings(Span.class));
		Document d = textToDocument(textWithMentions);
		agdistis.run(d, null);
		for (NamedEntityInText namedEntity : d.getNamedEntitiesInText()) {
			String disambiguatedURL = namedEntity.getNamedEntityUri();

			if (disambiguatedURL == null) {
				annotations.add(new NamedEntity(namedEntity.getStartPos(), namedEntity.getLength(), URLDecoder
						.decode("http://aksw.org/notInWiki/" + namedEntity.getSingleWordLabel(), "UTF-8")));
			} else {
				annotations.add(new NamedEntity(namedEntity.getStartPos(), namedEntity.getLength(),
						URLDecoder.decode(disambiguatedURL, "UTF-8")));
			}
		}
		document.setMarkings(new ArrayList<Marking>(annotations));
		log.debug("Result: " + document.toString());
		nifDocument = creator.getDocumentAsNIFString(document);
	} catch (Exception e) {
		log.error("Exception while reading request.", e);
		return "";
	}
	agdistis.close();
	return nifDocument;
}
 
示例25
protected static List<Marking> findMarkings(String line[], String text) {
    List<Marking> markings = new ArrayList<Marking>(line.length / 2);
    String textWithoutHashes = null;
    int start, pos;
    IntArrayList hashes = new IntArrayList();
    int end = 0;
    for (int i = FIRST_ANNOTATION_INDEX; i < line.length; i = i + 2) {
        start = text.indexOf(line[i], end);
        // The mentioned entity couldn't be found. Let's search
        // in a text that contains no hashes.
        if (start < 0) {
            if (textWithoutHashes == null) {
                /*
                 * A very simple workaround to search for a mention without
                 * hashes. Note that this only works, if the mention
                 * couldn't be found because the tweets contains hash tags
                 * that should be part of the mentions.
                 */
                pos = text.indexOf('#');
                while (pos >= 0) {
                    hashes.add(pos);
                    pos = text.indexOf('#', pos + 1);
                }
                textWithoutHashes = text.replaceAll("#", "");
            }
            // The offset might have been moved through the
            // removing
            // of the hashes.
            for (int j = 0; (i < hashes.elementsCount) && (hashes.buffer[j] < end); ++j) {
                --end;
            }
            // search again
            start = textWithoutHashes.indexOf(line[i], end);
            if (start >= 0) {
                // find the start and end positions of the
                // mention
                // inside the original tweet by looking at the
                // list
                // of hashes
                end = start + line[i].length();
                for (int j = 0; (j < hashes.elementsCount) && (hashes.buffer[j] < end); ++j) {
                    ++end;
                    if (hashes.buffer[j] < start) {
                        ++start;
                    }
                }
            }
        } else {
            end = start + line[i].length();
        }
        if (start < 0) {
            LOGGER.warn("Couldn't find \"{}\" inside \"{}\". This annotation will be ignored.", line[i], text);
        } else {
            markings.add(new NamedEntity(start, end - start, line[i + 1]));
        }
    }
    return markings;
}
 
示例26
public NamedEntity toNamedEntity() {
	return new NamedEntity(startPosition, length, uris);
}
 
示例27
public NamedEntity translate(it.unipi.di.acube.batframework.data.Annotation annotation) {
    return new NamedEntity(annotation.getPosition(), annotation.getLength(), translateWId(annotation.getConcept()));
}
 
示例28
public NamedEntity translate(it.unipi.di.acube.batframework.data.ScoredAnnotation annotation) {
    return new ScoredNamedEntity(annotation.getPosition(), annotation.getLength(),
            translateWId(annotation.getConcept()), annotation.getScore());
}
 
示例29
protected void parseMarkings(JSONObject outObj, Document resultDoc) throws GerbilException {
    try {
        if (outObj != null && outObj.has("mentions")) {
            JSONArray mentions = outObj.getJSONArray("mentions");
            if (mentions != null) {
                JSONObject mention, bestEntity;
                int offset, length;
                Set<String> uris;
                double confidence;
                for (int i = 0; i < mentions.length(); ++i) {
                    mention = mentions.getJSONObject(i);
                    if (mention != null && mention.has("bestEntity") && mention.has("offset")
                            && mention.has("length")) {
                        offset = mention.getInt("offset");
                        length = mention.getInt("length");
                        bestEntity = mention.getJSONObject("bestEntity");
                        uris = null;
                        confidence = -1;
                        if (bestEntity != null && bestEntity.has("kbIdentifier")) {
                            uris = generateUriSet(bestEntity.getString("kbIdentifier"));
                            if (bestEntity.has("disambiguationScore")) {
                                confidence = bestEntity.getDouble("disambiguationScore");
                            }
                            if (uris != null) {
                                if (confidence > -1) {
                                    resultDoc.addMarking(new ScoredNamedEntity(offset, length, uris, confidence));
                                } else {
                                    resultDoc.addMarking(new NamedEntity(offset, length, uris));
                                }
                            }
                        } else {
                            LOGGER.warn("Got an incomplete mention from AIDA: {}. It will be ignored",
                                    bestEntity.toString());
                        }
                    }
                }
            }
        }
    } catch (Exception e) {
        throw new GerbilException("Got an Exception while parsing the response of AIDA.", e,
                ErrorTypes.UNEXPECTED_EXCEPTION);
    }
}
 
示例30
public TypeExtractionResult(List<NamedEntity> types, TypingInfo typeInfo) {
    super();
    this.types = types;
    this.typeInfo = typeInfo;
}