Prepare: getting named entities of different types, without duplicate names

This commit is contained in:
Francesco 2022-04-08 11:47:57 +02:00
parent 4ff9d983a3
commit 1cfee42c47
3 changed files with 31 additions and 8 deletions

View File

@ -5,7 +5,6 @@ import org.apache.jena.reasoner.ReasonerRegistry
import java.io.InputStream
object Main extends App {
{
println("Loading knowledge graph...")
val kg: InfModel = ModelFactory.createInfModel(ReasonerRegistry.getTransitiveReasoner, ModelFactory.createDefaultModel)
@ -17,8 +16,12 @@ object Main extends App {
})
kg.read(mondialDataset, null, "TTL");
printf("Loaded %s triples\n", kg.listStatements().toList.size)
val countries = getMondialNamedEntities(kg, "Country")
printf("There are %d countries in the dataset\n", countries.size)
val guessableEntities = NamedEntitySet() ++
getMondialNamedEntities(kg, "Country") ++
getMondialNamedEntities(kg, "City") ++
getMondialNamedEntities(kg, "Continent") ++
getMondialNamedEntities(kg, "Sea")
printf("Found %d guessable named entities\n", guessableEntities.set.size)
}
def getMondialNamedEntities(kg: Model, prefixedType: String): List[NamedEntity] = {
@ -27,16 +30,20 @@ object Main extends App {
val MONDIAL_PREFIX = "http://www.semwebtech.org/mondial/10/meta#"
val fullType = MONDIAL_PREFIX + prefixedType;
val nameProperty = MONDIAL_PREFIX + "name"
val queryString = String.format("SELECT ?entity ?name WHERE { ?entity a <%s> ; <%s> ?name. }", fullType, nameProperty)
val queryString = s"SELECT ?entity ?name WHERE { ?entity a <${fullType}> ; <${nameProperty}> ?name. }"
val resultSet = QueryExecutionFactory.create(queryString, kg).execSelect()
while (resultSet.hasNext) {
val solution = resultSet.next()
result = result :+ new NamedEntity(
solution.getLiteral("?name").getString,
solution.getResource("?entity")
solution.getResource("?entity"),
prefixedType
)
}
if (result.isEmpty) {
System.err.printf("Error: found no named entities of type %s\n", prefixedType)
System.exit(1)
}
result
}
}

View File

@ -1,5 +1,5 @@
import org.apache.jena.rdf.model.Resource
class NamedEntity(val Name: String, val entity: Resource) {
class NamedEntity(val Name: String, val entity: Resource, val readableType: String) {
}

View File

@ -0,0 +1,16 @@
class NamedEntitySet(val set: Set[NamedEntity] = Set()) {
def ++(iterable: Iterable[NamedEntity]): NamedEntitySet = {
val nameSet = set.map(e => e.Name)
val duplicateNameEntity = iterable.find(e => nameSet.contains(e.Name))
if (duplicateNameEntity.nonEmpty) {
System.err.printf("Error: multiple entities exist with name %s\n", duplicateNameEntity.get.Name)
System.exit(1)
}
NamedEntitySet(set ++ iterable)
}
}
object NamedEntitySet {
def apply(set: Set[NamedEntity] = Set()): NamedEntitySet = new NamedEntitySet(set)
}