package guessNNprepare.mains import guessNNprepare.NamedEntity import net.sourceforge.argparse4j.impl.Arguments import net.sourceforge.argparse4j.inf.{ArgumentParser, Namespace} import org.apache.jena.query.QueryExecutionFactory import org.apache.jena.rdf.model.{InfModel, Model, ModelFactory} import org.apache.jena.reasoner.ReasonerRegistry import org.json.JSONArray import java.io.{FileWriter, InputStream} object ExtractEntities extends MainCommand { override def description: String = "Save the list of guessable entities to a json file" override def addCliArgs(parser: ArgumentParser): Unit = { parser.description(description) parser.addArgument("json_file").`type`(Arguments.fileType().verifyCanWriteParent()).help("Where the guessable entities will be saved") } override def execute(ARGS: Namespace): Unit = { val jsonFilePath = ARGS.getString("json_file") println("Loading knowledge graph...") val kg: InfModel = ModelFactory.createInfModel(ReasonerRegistry.getTransitiveReasoner, ModelFactory.createDefaultModel) val mondialDataset: InputStream = Option(ClassLoader.getSystemResourceAsStream("mondial_2022_04_04.n3")) .getOrElse({ System.err.println("Impossible to open dataset") System.exit(1) null }) kg.read(mondialDataset, null, "TTL"); printf("Loaded %s triples\n", kg.listStatements().toList.size) val guessableEntities = (List[NamedEntity]() ++ getMondialNamedEntities(kg, "Country") ++ getMondialNamedEntities(kg, "City") ++ getMondialNamedEntities(kg, "Continent") ++ getMondialNamedEntities(kg, "Sea")) .distinct printf("Found %d guessable named entities\n", guessableEntities.size) println(s"Writing entities to ${jsonFilePath}") val jsonEntities:JSONArray = new JSONArray() guessableEntities.foreach(e => jsonEntities.put(e.json)) val writer = new FileWriter(jsonFilePath) jsonEntities.write(writer) writer.flush() writer.close() println("Done.") } def getMondialNamedEntities(kg: Model, prefixedType: String): List[NamedEntity] = { var result: List[NamedEntity] = Nil //noinspection HttpUrlsUsage val MONDIAL_PREFIX = "http://www.semwebtech.org/mondial/10/meta#" val fullType = MONDIAL_PREFIX + prefixedType; val nameProperty = MONDIAL_PREFIX + "name" val queryString = s"SELECT ?entity ?name WHERE { ?entity a <${fullType}> ; <${nameProperty}> ?name. }" val resultSet = QueryExecutionFactory.create(queryString, kg).execSelect() while (resultSet.hasNext) { val solution = resultSet.next() result = result :+ new NamedEntity( solution.getResource("?entity"), solution.getLiteral("?name").getString, prefixedType ) } if (result.isEmpty) { System.err.printf("Error: found no named entities of type %s\n", prefixedType) System.exit(1) } result } }