75 lines
2.7 KiB
Scala
75 lines
2.7 KiB
Scala
package guessNNprepare.mains
|
|
|
|
import conceptualKNN.utils.Table
|
|
import conceptualKNN.{ConceptualKNNModel, Partition}
|
|
import guessNNprepare.{NamedEntity, Utils}
|
|
import net.sourceforge.argparse4j.impl.Arguments
|
|
import net.sourceforge.argparse4j.inf.{ArgumentParser, Namespace}
|
|
import org.apache.jena.sparql.core.Var
|
|
import org.apache.jena.sparql.engine.binding.BindingFactory
|
|
import org.json.{JSONArray, JSONObject, JSONTokener}
|
|
|
|
import java.nio.file.{Files, Path, Paths}
|
|
import java.util.concurrent.atomic.AtomicBoolean
|
|
import scala.jdk.CollectionConverters.SeqHasAsJava
|
|
import scala.util.Try
|
|
|
|
object ComputeConcepts extends MainCommand {
|
|
override def description: String = "Compute the CNN for a given goal entity"
|
|
|
|
override def addCliArgs(parser: ArgumentParser): Unit = {
|
|
parser.description(description)
|
|
|
|
parser.addArgument("guessable_entities").`type`(Arguments.fileType().verifyCanRead()).help("JSON file containing all guessable entities")
|
|
parser.addArgument("target").help("URI of the entity that should be guessed")
|
|
}
|
|
|
|
override def execute(ARGS: Namespace): Unit = {
|
|
// CLI args
|
|
val entitiesFilePath = Paths.get(ARGS.getString("guessable_entities"))
|
|
val guessTarget = ARGS.getString("target")
|
|
|
|
println(s"Loading entities from ${entitiesFilePath}")
|
|
val entities: List[NamedEntity] = loadEntitiesOrFail(entitiesFilePath)
|
|
println(s"Loaded ${entities.length} entities")
|
|
if (!entities.exists(e => e.rdfResource.getURI == guessTarget)) {
|
|
System.err.println("Error: target entities is not part of the guessable entities")
|
|
System.exit(1)
|
|
}
|
|
println("Loading data graph...")
|
|
val dataGraph = Utils.loadMondialDataset().get
|
|
println(s"Loaded ${dataGraph.listStatements().toList.size()} triples")
|
|
|
|
val entityVar = Var.alloc("e")
|
|
val entityTable = new Table(List(entityVar).asJava)
|
|
entities
|
|
.map(entity => dataGraph.createResource(entity.rdfResource))
|
|
.map(resource => resource.asNode())
|
|
.map(node => BindingFactory.binding(entityVar, node))
|
|
.foreach(binding => entityTable.addBinding(binding))
|
|
|
|
val cnnPartition = new Partition(new ConceptualKNNModel(dataGraph), List(guessTarget).asJava, entityTable, 0)
|
|
println("Starting concept computation")
|
|
cnnPartition.fullPartitioning(new AtomicBoolean(false))
|
|
println(s"Computation finished, ${cnnPartition.getNbConcepts} concepts computed")
|
|
|
|
|
|
}
|
|
|
|
private def loadEntitiesOrFail(jsonFilePath: Path): List[NamedEntity] = {
|
|
Try(new JSONArray(new JSONTokener(Files.newInputStream(jsonFilePath))))
|
|
.map(arr =>
|
|
Utils.JSONArrayElementsAs[JSONObject](arr)
|
|
.map(NamedEntity.apply)
|
|
.toList
|
|
)
|
|
.flatMap(Utils.flattenTryList)
|
|
.recover(err => {
|
|
System.err.println(s"Error when parsing entities: ${err}")
|
|
System.exit(1)
|
|
Nil
|
|
})
|
|
.get
|
|
}
|
|
}
|