diff --git a/prepare/build.gradle b/prepare/build.gradle index 322fff3..c763ee2 100644 --- a/prepare/build.gradle +++ b/prepare/build.gradle @@ -16,6 +16,7 @@ dependencies { implementation 'org.slf4j:slf4j-simple:1.7.36' implementation 'net.sourceforge.argparse4j:argparse4j:0.9.0' implementation 'org.json:json:20220320' + implementation files("lib/conceptualKNN.v1.6.jar") } sourceSets { diff --git a/prepare/lib/conceptualKNN.v1.6.jar b/prepare/lib/conceptualKNN.v1.6.jar new file mode 100644 index 0000000..d271b9d Binary files /dev/null and b/prepare/lib/conceptualKNN.v1.6.jar differ diff --git a/prepare/src/main/guessNNprepare/Utils.scala b/prepare/src/main/guessNNprepare/Utils.scala index e4a1371..37d4d21 100644 --- a/prepare/src/main/guessNNprepare/Utils.scala +++ b/prepare/src/main/guessNNprepare/Utils.scala @@ -1,5 +1,7 @@ package guessNNprepare +import org.apache.jena.rdf.model.{InfModel, ModelFactory} +import org.apache.jena.reasoner.ReasonerRegistry import org.json.JSONArray import java.util.stream.StreamSupport @@ -8,6 +10,16 @@ import scala.util.{Failure, Success, Try} object Utils { + def loadMondialDataset(): Try[InfModel] = { + Option(ClassLoader.getSystemResourceAsStream("mondial_2022_04_04.n3")) + .map(stream => { + val kg: InfModel = ModelFactory.createInfModel(ReasonerRegistry.getTransitiveReasoner, ModelFactory.createDefaultModel) + kg.read(stream, null, "TTL") + Success(kg) + }) + .getOrElse(Failure(new RuntimeException("Impossible to open mondial dataset (should not happen)"))) + } + def JSONArrayElementsAs[T](array: JSONArray): Seq[T] = { StreamSupport.stream(array.spliterator(), false).toScala(LazyList) .map(obj => obj.asInstanceOf[T]) diff --git a/prepare/src/main/guessNNprepare/mains/ComputeConcepts.scala b/prepare/src/main/guessNNprepare/mains/ComputeConcepts.scala index 818ec22..f299d73 100644 --- a/prepare/src/main/guessNNprepare/mains/ComputeConcepts.scala +++ b/prepare/src/main/guessNNprepare/mains/ComputeConcepts.scala @@ -1,11 +1,17 @@ package guessNNprepare.mains +import conceptualKNN.utils.Table +import conceptualKNN.{ConceptualKNNModel, Partition} import guessNNprepare.{NamedEntity, Utils} import net.sourceforge.argparse4j.impl.Arguments import net.sourceforge.argparse4j.inf.{ArgumentParser, Namespace} +import org.apache.jena.sparql.core.Var +import org.apache.jena.sparql.engine.binding.BindingFactory import org.json.{JSONArray, JSONObject, JSONTokener} import java.nio.file.{Files, Path, Paths} +import java.util.concurrent.atomic.AtomicBoolean +import scala.jdk.CollectionConverters.SeqHasAsJava import scala.util.Try object ComputeConcepts extends MainCommand { @@ -15,15 +21,37 @@ object ComputeConcepts extends MainCommand { parser.description(description) parser.addArgument("guessable_entities").`type`(Arguments.fileType().verifyCanRead()).help("JSON file containing all guessable entities") + parser.addArgument("target").help("URI of the entity that should be guessed") } override def execute(ARGS: Namespace): Unit = { // CLI args val entitiesFilePath = Paths.get(ARGS.getString("guessable_entities")) + val guessTarget = ARGS.getString("target") println(s"Loading entities from ${entitiesFilePath}") val entities: List[NamedEntity] = loadEntitiesOrFail(entitiesFilePath) println(s"Loaded ${entities.length} entities") + if (!entities.exists(e => e.rdfResource.getURI == guessTarget)) { + System.err.println("Error: target entities is not part of the guessable entities") + System.exit(1) + } + println("Loading data graph...") + val dataGraph = Utils.loadMondialDataset().get + println(s"Loaded ${dataGraph.listStatements().toList.size()} triples") + + val entityVar = Var.alloc("e") + val entityTable = new Table(List(entityVar).asJava) + entities + .map(entity => dataGraph.createResource(entity.rdfResource)) + .map(resource => resource.asNode()) + .map(node => BindingFactory.binding(entityVar, node)) + .foreach(binding => entityTable.addBinding(binding)) + + val cnnPartition = new Partition(new ConceptualKNNModel(dataGraph), List(guessTarget).asJava, entityTable, 0) + println("Starting concept computation") + cnnPartition.fullPartitioning(new AtomicBoolean(false)) + println(s"Computation finished, ${cnnPartition.getNbConcepts} concepts computed") } diff --git a/prepare/src/main/guessNNprepare/mains/ExtractEntities.scala b/prepare/src/main/guessNNprepare/mains/ExtractEntities.scala index 61f8f73..ab9bc15 100644 --- a/prepare/src/main/guessNNprepare/mains/ExtractEntities.scala +++ b/prepare/src/main/guessNNprepare/mains/ExtractEntities.scala @@ -1,6 +1,6 @@ package guessNNprepare.mains -import guessNNprepare.NamedEntity +import guessNNprepare.{NamedEntity, Utils} import net.sourceforge.argparse4j.impl.Arguments import net.sourceforge.argparse4j.inf.{ArgumentParser, Namespace} import org.apache.jena.query.QueryExecutionFactory @@ -24,14 +24,7 @@ object ExtractEntities extends MainCommand { val jsonFilePath = ARGS.getString("json_file") println("Loading knowledge graph...") - val kg: InfModel = ModelFactory.createInfModel(ReasonerRegistry.getTransitiveReasoner, ModelFactory.createDefaultModel) - val mondialDataset: InputStream = Option(ClassLoader.getSystemResourceAsStream("mondial_2022_04_04.n3")) - .getOrElse({ - System.err.println("Impossible to open dataset") - System.exit(1) - null - }) - kg.read(mondialDataset, null, "TTL"); + val kg = Utils.loadMondialDataset().get printf("Loaded %s triples\n", kg.listStatements().toList.size) val guessableEntities = (List[NamedEntity]() ++ getMondialNamedEntities(kg, "Country") ++