Prepare: converting CNN library output to custom output

This commit is contained in:
Francesco 2022-04-18 14:08:36 +02:00
parent 815e9f1b1f
commit 4caad04911
3 changed files with 49 additions and 14 deletions

View File

@ -2,8 +2,9 @@ package guessNNprepare
import org.apache.jena.rdf.model.{InfModel, ModelFactory} import org.apache.jena.rdf.model.{InfModel, ModelFactory}
import org.apache.jena.reasoner.ReasonerRegistry import org.apache.jena.reasoner.ReasonerRegistry
import org.json.JSONArray import org.json.{JSONArray, JSONObject}
import java.io.FileWriter
import java.util.stream.StreamSupport import java.util.stream.StreamSupport
import scala.jdk.StreamConverters.StreamHasToScala import scala.jdk.StreamConverters.StreamHasToScala
import scala.util.{Failure, Success, Try} import scala.util.{Failure, Success, Try}
@ -34,4 +35,21 @@ object Utils {
case Nil => Success(Nil) case Nil => Success(Nil)
} }
sealed trait JSONObjectOrArray[T]
implicit object JSONObjectImplicit extends JSONObjectOrArray[JSONObject]
implicit object JSONArrayImplicit extends JSONObjectOrArray[JSONArray]
def writeJsonToFile[T: JSONObjectOrArray](json: T, path: String): Unit = {
val writer = new FileWriter(path)
json match {
case value: JSONObject => value.write(writer)
case value: JSONArray => value.write(writer)
}
writer.flush()
writer.close()
}
} }

View File

@ -29,7 +29,7 @@ object ComputeConcepts extends MainCommand {
// CLI args // CLI args
val entitiesFilePath = Paths.get(ARGS.getString("guessable_entities")) val entitiesFilePath = Paths.get(ARGS.getString("guessable_entities"))
val guessTarget = ARGS.getString("target") val guessTarget = ARGS.getString("target")
val outputFilePath = Paths.get(ARGS.getString("json_output")) val outputFilePath = ARGS.getString("json_output")
println(s"Loading entities from ${entitiesFilePath}") println(s"Loading entities from ${entitiesFilePath}")
val entities: List[NamedEntity] = loadEntitiesOrFail(entitiesFilePath) val entities: List[NamedEntity] = loadEntitiesOrFail(entitiesFilePath)
@ -59,10 +59,11 @@ object ComputeConcepts extends MainCommand {
partitionThread.join(60000) partitionThread.join(60000)
stop.set(true) stop.set(true)
partitionThread.join() partitionThread.join()
println(s"Concept computation finished, ${(System.currentTimeMillis() - startTime)/1000}s elapsed.") println(s"Concept computation finished, ${(System.currentTimeMillis() - startTime) / 1000}s elapsed.")
Files.writeString(outputFilePath, cnnPartition.toJson) // The only way to programmatically get the output of the CNN algorithm is to parse its json
val output = cnnJsonToOutputJson(new JSONObject(new JSONTokener(cnnPartition.toJson)), entities.size)
println(s"Writing output to ${outputFilePath}")
Utils.writeJsonToFile(output, outputFilePath)
} }
private def loadEntitiesOrFail(jsonFilePath: Path): List[NamedEntity] = { private def loadEntitiesOrFail(jsonFilePath: Path): List[NamedEntity] = {
@ -80,4 +81,26 @@ object ComputeConcepts extends MainCommand {
}) })
.get .get
} }
private def cnnJsonToOutputJson(cnnJson: JSONObject, nbEntities: Int): JSONObject = {
val result = new JSONObject()
result.put("target", cnnJson.getJSONArray("target").get(0))
result.put("nbEntities", nbEntities)
result.put("concepts", new JSONArray(
Utils.JSONArrayElementsAs[JSONObject](cnnJson.getJSONArray("conceptsOfNeighbours"))
.map(processConcept)
.asJava
))
result
}
private def processConcept(concept: JSONObject): JSONObject = {
val result = new JSONObject()
result.put("extensionalDistance", concept.getInt("extensionalDistance"))
val objects: Seq[JSONObject] = Utils.JSONArrayElementsAs(concept.getJSONArray("answers"))
result.put("properSize", objects.size)
result.put("elements", new JSONArray(objects.asJava))
result
}
} }

View File

@ -4,12 +4,9 @@ import guessNNprepare.{NamedEntity, Utils}
import net.sourceforge.argparse4j.impl.Arguments import net.sourceforge.argparse4j.impl.Arguments
import net.sourceforge.argparse4j.inf.{ArgumentParser, Namespace} import net.sourceforge.argparse4j.inf.{ArgumentParser, Namespace}
import org.apache.jena.query.QueryExecutionFactory import org.apache.jena.query.QueryExecutionFactory
import org.apache.jena.rdf.model.{InfModel, Model, ModelFactory} import org.apache.jena.rdf.model.Model
import org.apache.jena.reasoner.ReasonerRegistry
import org.json.JSONArray import org.json.JSONArray
import java.io.{FileWriter, InputStream}
object ExtractEntities extends MainCommand { object ExtractEntities extends MainCommand {
override def description: String = "Save the list of guessable entities to a json file" override def description: String = "Save the list of guessable entities to a json file"
@ -37,10 +34,7 @@ object ExtractEntities extends MainCommand {
println(s"Writing entities to ${jsonFilePath}") println(s"Writing entities to ${jsonFilePath}")
val jsonEntities:JSONArray = new JSONArray() val jsonEntities:JSONArray = new JSONArray()
guessableEntities.foreach(e => jsonEntities.put(e.json)) guessableEntities.foreach(e => jsonEntities.put(e.json))
val writer = new FileWriter(jsonFilePath) Utils.writeJsonToFile(jsonEntities, jsonFilePath)
jsonEntities.write(writer)
writer.flush()
writer.close()
println("Done.") println("Done.")
} }