Prepare: getting named entities of different types, without duplicate names
This commit is contained in:
parent
4ff9d983a3
commit
1cfee42c47
|
|
@ -5,7 +5,6 @@ import org.apache.jena.reasoner.ReasonerRegistry
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
|
|
||||||
object Main extends App {
|
object Main extends App {
|
||||||
|
|
||||||
{
|
{
|
||||||
println("Loading knowledge graph...")
|
println("Loading knowledge graph...")
|
||||||
val kg: InfModel = ModelFactory.createInfModel(ReasonerRegistry.getTransitiveReasoner, ModelFactory.createDefaultModel)
|
val kg: InfModel = ModelFactory.createInfModel(ReasonerRegistry.getTransitiveReasoner, ModelFactory.createDefaultModel)
|
||||||
|
|
@ -17,8 +16,12 @@ object Main extends App {
|
||||||
})
|
})
|
||||||
kg.read(mondialDataset, null, "TTL");
|
kg.read(mondialDataset, null, "TTL");
|
||||||
printf("Loaded %s triples\n", kg.listStatements().toList.size)
|
printf("Loaded %s triples\n", kg.listStatements().toList.size)
|
||||||
val countries = getMondialNamedEntities(kg, "Country")
|
val guessableEntities = NamedEntitySet() ++
|
||||||
printf("There are %d countries in the dataset\n", countries.size)
|
getMondialNamedEntities(kg, "Country") ++
|
||||||
|
getMondialNamedEntities(kg, "City") ++
|
||||||
|
getMondialNamedEntities(kg, "Continent") ++
|
||||||
|
getMondialNamedEntities(kg, "Sea")
|
||||||
|
printf("Found %d guessable named entities\n", guessableEntities.set.size)
|
||||||
}
|
}
|
||||||
|
|
||||||
def getMondialNamedEntities(kg: Model, prefixedType: String): List[NamedEntity] = {
|
def getMondialNamedEntities(kg: Model, prefixedType: String): List[NamedEntity] = {
|
||||||
|
|
@ -27,16 +30,20 @@ object Main extends App {
|
||||||
val MONDIAL_PREFIX = "http://www.semwebtech.org/mondial/10/meta#"
|
val MONDIAL_PREFIX = "http://www.semwebtech.org/mondial/10/meta#"
|
||||||
val fullType = MONDIAL_PREFIX + prefixedType;
|
val fullType = MONDIAL_PREFIX + prefixedType;
|
||||||
val nameProperty = MONDIAL_PREFIX + "name"
|
val nameProperty = MONDIAL_PREFIX + "name"
|
||||||
val queryString = String.format("SELECT ?entity ?name WHERE { ?entity a <%s> ; <%s> ?name. }", fullType, nameProperty)
|
val queryString = s"SELECT ?entity ?name WHERE { ?entity a <${fullType}> ; <${nameProperty}> ?name. }"
|
||||||
val resultSet = QueryExecutionFactory.create(queryString, kg).execSelect()
|
val resultSet = QueryExecutionFactory.create(queryString, kg).execSelect()
|
||||||
while (resultSet.hasNext) {
|
while (resultSet.hasNext) {
|
||||||
val solution = resultSet.next()
|
val solution = resultSet.next()
|
||||||
result = result :+ new NamedEntity(
|
result = result :+ new NamedEntity(
|
||||||
solution.getLiteral("?name").getString,
|
solution.getLiteral("?name").getString,
|
||||||
solution.getResource("?entity")
|
solution.getResource("?entity"),
|
||||||
|
prefixedType
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
if (result.isEmpty) {
|
||||||
|
System.err.printf("Error: found no named entities of type %s\n", prefixedType)
|
||||||
|
System.exit(1)
|
||||||
|
}
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
import org.apache.jena.rdf.model.Resource
|
import org.apache.jena.rdf.model.Resource
|
||||||
|
|
||||||
class NamedEntity(val Name: String, val entity: Resource) {
|
class NamedEntity(val Name: String, val entity: Resource, val readableType: String) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
class NamedEntitySet(val set: Set[NamedEntity] = Set()) {
|
||||||
|
|
||||||
|
def ++(iterable: Iterable[NamedEntity]): NamedEntitySet = {
|
||||||
|
val nameSet = set.map(e => e.Name)
|
||||||
|
val duplicateNameEntity = iterable.find(e => nameSet.contains(e.Name))
|
||||||
|
if (duplicateNameEntity.nonEmpty) {
|
||||||
|
System.err.printf("Error: multiple entities exist with name %s\n", duplicateNameEntity.get.Name)
|
||||||
|
System.exit(1)
|
||||||
|
}
|
||||||
|
NamedEntitySet(set ++ iterable)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object NamedEntitySet {
|
||||||
|
def apply(set: Set[NamedEntity] = Set()): NamedEntitySet = new NamedEntitySet(set)
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue