78 lines
2.7 KiB
Scala
78 lines
2.7 KiB
Scala
package guessNNprepare.mains
|
|
|
|
import guessNNprepare.NamedEntity
|
|
import net.sourceforge.argparse4j.impl.Arguments
|
|
import net.sourceforge.argparse4j.inf.{ArgumentParser, Namespace}
|
|
import org.apache.jena.query.QueryExecutionFactory
|
|
import org.apache.jena.rdf.model.{InfModel, Model, ModelFactory}
|
|
import org.apache.jena.reasoner.ReasonerRegistry
|
|
import org.json.JSONArray
|
|
|
|
import java.io.{FileWriter, InputStream}
|
|
|
|
object ExtractEntities extends MainCommand {
|
|
|
|
override def description: String = "Save the list of guessable entities to a json file"
|
|
|
|
override def addCliArgs(parser: ArgumentParser): Unit = {
|
|
parser.description(description)
|
|
|
|
parser.addArgument("json_file").`type`(Arguments.fileType().verifyCanWriteParent()).help("Where the guessable entities will be saved")
|
|
}
|
|
|
|
override def execute(ARGS: Namespace): Unit = {
|
|
val jsonFilePath = ARGS.getString("json_file")
|
|
|
|
println("Loading knowledge graph...")
|
|
val kg: InfModel = ModelFactory.createInfModel(ReasonerRegistry.getTransitiveReasoner, ModelFactory.createDefaultModel)
|
|
val mondialDataset: InputStream = Option(ClassLoader.getSystemResourceAsStream("mondial_2022_04_04.n3"))
|
|
.getOrElse({
|
|
System.err.println("Impossible to open dataset")
|
|
System.exit(1)
|
|
null
|
|
})
|
|
kg.read(mondialDataset, null, "TTL");
|
|
printf("Loaded %s triples\n", kg.listStatements().toList.size)
|
|
val guessableEntities = (List[NamedEntity]() ++
|
|
getMondialNamedEntities(kg, "Country") ++
|
|
getMondialNamedEntities(kg, "City") ++
|
|
getMondialNamedEntities(kg, "Continent") ++
|
|
getMondialNamedEntities(kg, "Sea"))
|
|
.distinct
|
|
printf("Found %d guessable named entities\n", guessableEntities.size)
|
|
|
|
println(s"Writing entities to ${jsonFilePath}")
|
|
val jsonEntities:JSONArray = new JSONArray()
|
|
guessableEntities.foreach(e => jsonEntities.put(e.json))
|
|
val writer = new FileWriter(jsonFilePath)
|
|
jsonEntities.write(writer)
|
|
writer.flush()
|
|
writer.close()
|
|
|
|
println("Done.")
|
|
}
|
|
|
|
def getMondialNamedEntities(kg: Model, prefixedType: String): List[NamedEntity] = {
|
|
var result: List[NamedEntity] = Nil
|
|
//noinspection HttpUrlsUsage
|
|
val MONDIAL_PREFIX = "http://www.semwebtech.org/mondial/10/meta#"
|
|
val fullType = MONDIAL_PREFIX + prefixedType;
|
|
val nameProperty = MONDIAL_PREFIX + "name"
|
|
val queryString = s"SELECT ?entity ?name WHERE { ?entity a <${fullType}> ; <${nameProperty}> ?name. }"
|
|
val resultSet = QueryExecutionFactory.create(queryString, kg).execSelect()
|
|
while (resultSet.hasNext) {
|
|
val solution = resultSet.next()
|
|
result = result :+ new NamedEntity(
|
|
solution.getResource("?entity"),
|
|
solution.getLiteral("?name").getString,
|
|
prefixedType
|
|
)
|
|
}
|
|
if (result.isEmpty) {
|
|
System.err.printf("Error: found no named entities of type %s\n", prefixedType)
|
|
System.exit(1)
|
|
}
|
|
result
|
|
}
|
|
}
|