Prepare: getting named entities of different types, without duplicate names
This commit is contained in:
parent
4ff9d983a3
commit
1cfee42c47
|
|
@ -5,7 +5,6 @@ import org.apache.jena.reasoner.ReasonerRegistry
|
|||
import java.io.InputStream
|
||||
|
||||
object Main extends App {
|
||||
|
||||
{
|
||||
println("Loading knowledge graph...")
|
||||
val kg: InfModel = ModelFactory.createInfModel(ReasonerRegistry.getTransitiveReasoner, ModelFactory.createDefaultModel)
|
||||
|
|
@ -17,8 +16,12 @@ object Main extends App {
|
|||
})
|
||||
kg.read(mondialDataset, null, "TTL");
|
||||
printf("Loaded %s triples\n", kg.listStatements().toList.size)
|
||||
val countries = getMondialNamedEntities(kg, "Country")
|
||||
printf("There are %d countries in the dataset\n", countries.size)
|
||||
val guessableEntities = NamedEntitySet() ++
|
||||
getMondialNamedEntities(kg, "Country") ++
|
||||
getMondialNamedEntities(kg, "City") ++
|
||||
getMondialNamedEntities(kg, "Continent") ++
|
||||
getMondialNamedEntities(kg, "Sea")
|
||||
printf("Found %d guessable named entities\n", guessableEntities.set.size)
|
||||
}
|
||||
|
||||
def getMondialNamedEntities(kg: Model, prefixedType: String): List[NamedEntity] = {
|
||||
|
|
@ -27,16 +30,20 @@ object Main extends App {
|
|||
val MONDIAL_PREFIX = "http://www.semwebtech.org/mondial/10/meta#"
|
||||
val fullType = MONDIAL_PREFIX + prefixedType;
|
||||
val nameProperty = MONDIAL_PREFIX + "name"
|
||||
val queryString = String.format("SELECT ?entity ?name WHERE { ?entity a <%s> ; <%s> ?name. }", fullType, nameProperty)
|
||||
val queryString = s"SELECT ?entity ?name WHERE { ?entity a <${fullType}> ; <${nameProperty}> ?name. }"
|
||||
val resultSet = QueryExecutionFactory.create(queryString, kg).execSelect()
|
||||
while (resultSet.hasNext) {
|
||||
val solution = resultSet.next()
|
||||
result = result :+ new NamedEntity(
|
||||
solution.getLiteral("?name").getString,
|
||||
solution.getResource("?entity")
|
||||
solution.getResource("?entity"),
|
||||
prefixedType
|
||||
)
|
||||
}
|
||||
if (result.isEmpty) {
|
||||
System.err.printf("Error: found no named entities of type %s\n", prefixedType)
|
||||
System.exit(1)
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import org.apache.jena.rdf.model.Resource
|
||||
|
||||
class NamedEntity(val Name: String, val entity: Resource) {
|
||||
|
||||
class NamedEntity(val Name: String, val entity: Resource, val readableType: String) {
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,16 @@
|
|||
class NamedEntitySet(val set: Set[NamedEntity] = Set()) {
|
||||
|
||||
def ++(iterable: Iterable[NamedEntity]): NamedEntitySet = {
|
||||
val nameSet = set.map(e => e.Name)
|
||||
val duplicateNameEntity = iterable.find(e => nameSet.contains(e.Name))
|
||||
if (duplicateNameEntity.nonEmpty) {
|
||||
System.err.printf("Error: multiple entities exist with name %s\n", duplicateNameEntity.get.Name)
|
||||
System.exit(1)
|
||||
}
|
||||
NamedEntitySet(set ++ iterable)
|
||||
}
|
||||
}
|
||||
|
||||
object NamedEntitySet {
|
||||
def apply(set: Set[NamedEntity] = Set()): NamedEntitySet = new NamedEntitySet(set)
|
||||
}
|
||||
Loading…
Reference in New Issue