Prepare: added concept of neighbours lib and trying to compute concepts without success

This commit is contained in:
Francesco 2022-04-16 19:11:59 +02:00
parent 0cbc2b319c
commit ef3d96d024
5 changed files with 43 additions and 9 deletions

View File

@ -16,6 +16,7 @@ dependencies {
implementation 'org.slf4j:slf4j-simple:1.7.36' implementation 'org.slf4j:slf4j-simple:1.7.36'
implementation 'net.sourceforge.argparse4j:argparse4j:0.9.0' implementation 'net.sourceforge.argparse4j:argparse4j:0.9.0'
implementation 'org.json:json:20220320' implementation 'org.json:json:20220320'
implementation files("lib/conceptualKNN.v1.6.jar")
} }
sourceSets { sourceSets {

Binary file not shown.

View File

@ -1,5 +1,7 @@
package guessNNprepare package guessNNprepare
import org.apache.jena.rdf.model.{InfModel, ModelFactory}
import org.apache.jena.reasoner.ReasonerRegistry
import org.json.JSONArray import org.json.JSONArray
import java.util.stream.StreamSupport import java.util.stream.StreamSupport
@ -8,6 +10,16 @@ import scala.util.{Failure, Success, Try}
object Utils { object Utils {
def loadMondialDataset(): Try[InfModel] = {
Option(ClassLoader.getSystemResourceAsStream("mondial_2022_04_04.n3"))
.map(stream => {
val kg: InfModel = ModelFactory.createInfModel(ReasonerRegistry.getTransitiveReasoner, ModelFactory.createDefaultModel)
kg.read(stream, null, "TTL")
Success(kg)
})
.getOrElse(Failure(new RuntimeException("Impossible to open mondial dataset (should not happen)")))
}
def JSONArrayElementsAs[T](array: JSONArray): Seq[T] = { def JSONArrayElementsAs[T](array: JSONArray): Seq[T] = {
StreamSupport.stream(array.spliterator(), false).toScala(LazyList) StreamSupport.stream(array.spliterator(), false).toScala(LazyList)
.map(obj => obj.asInstanceOf[T]) .map(obj => obj.asInstanceOf[T])

View File

@ -1,11 +1,17 @@
package guessNNprepare.mains package guessNNprepare.mains
import conceptualKNN.utils.Table
import conceptualKNN.{ConceptualKNNModel, Partition}
import guessNNprepare.{NamedEntity, Utils} import guessNNprepare.{NamedEntity, Utils}
import net.sourceforge.argparse4j.impl.Arguments import net.sourceforge.argparse4j.impl.Arguments
import net.sourceforge.argparse4j.inf.{ArgumentParser, Namespace} import net.sourceforge.argparse4j.inf.{ArgumentParser, Namespace}
import org.apache.jena.sparql.core.Var
import org.apache.jena.sparql.engine.binding.BindingFactory
import org.json.{JSONArray, JSONObject, JSONTokener} import org.json.{JSONArray, JSONObject, JSONTokener}
import java.nio.file.{Files, Path, Paths} import java.nio.file.{Files, Path, Paths}
import java.util.concurrent.atomic.AtomicBoolean
import scala.jdk.CollectionConverters.SeqHasAsJava
import scala.util.Try import scala.util.Try
object ComputeConcepts extends MainCommand { object ComputeConcepts extends MainCommand {
@ -15,15 +21,37 @@ object ComputeConcepts extends MainCommand {
parser.description(description) parser.description(description)
parser.addArgument("guessable_entities").`type`(Arguments.fileType().verifyCanRead()).help("JSON file containing all guessable entities") parser.addArgument("guessable_entities").`type`(Arguments.fileType().verifyCanRead()).help("JSON file containing all guessable entities")
parser.addArgument("target").help("URI of the entity that should be guessed")
} }
override def execute(ARGS: Namespace): Unit = { override def execute(ARGS: Namespace): Unit = {
// CLI args // CLI args
val entitiesFilePath = Paths.get(ARGS.getString("guessable_entities")) val entitiesFilePath = Paths.get(ARGS.getString("guessable_entities"))
val guessTarget = ARGS.getString("target")
println(s"Loading entities from ${entitiesFilePath}") println(s"Loading entities from ${entitiesFilePath}")
val entities: List[NamedEntity] = loadEntitiesOrFail(entitiesFilePath) val entities: List[NamedEntity] = loadEntitiesOrFail(entitiesFilePath)
println(s"Loaded ${entities.length} entities") println(s"Loaded ${entities.length} entities")
if (!entities.exists(e => e.rdfResource.getURI == guessTarget)) {
System.err.println("Error: target entities is not part of the guessable entities")
System.exit(1)
}
println("Loading data graph...")
val dataGraph = Utils.loadMondialDataset().get
println(s"Loaded ${dataGraph.listStatements().toList.size()} triples")
val entityVar = Var.alloc("e")
val entityTable = new Table(List(entityVar).asJava)
entities
.map(entity => dataGraph.createResource(entity.rdfResource))
.map(resource => resource.asNode())
.map(node => BindingFactory.binding(entityVar, node))
.foreach(binding => entityTable.addBinding(binding))
val cnnPartition = new Partition(new ConceptualKNNModel(dataGraph), List(guessTarget).asJava, entityTable, 0)
println("Starting concept computation")
cnnPartition.fullPartitioning(new AtomicBoolean(false))
println(s"Computation finished, ${cnnPartition.getNbConcepts} concepts computed")
} }

View File

@ -1,6 +1,6 @@
package guessNNprepare.mains package guessNNprepare.mains
import guessNNprepare.NamedEntity import guessNNprepare.{NamedEntity, Utils}
import net.sourceforge.argparse4j.impl.Arguments import net.sourceforge.argparse4j.impl.Arguments
import net.sourceforge.argparse4j.inf.{ArgumentParser, Namespace} import net.sourceforge.argparse4j.inf.{ArgumentParser, Namespace}
import org.apache.jena.query.QueryExecutionFactory import org.apache.jena.query.QueryExecutionFactory
@ -24,14 +24,7 @@ object ExtractEntities extends MainCommand {
val jsonFilePath = ARGS.getString("json_file") val jsonFilePath = ARGS.getString("json_file")
println("Loading knowledge graph...") println("Loading knowledge graph...")
val kg: InfModel = ModelFactory.createInfModel(ReasonerRegistry.getTransitiveReasoner, ModelFactory.createDefaultModel) val kg = Utils.loadMondialDataset().get
val mondialDataset: InputStream = Option(ClassLoader.getSystemResourceAsStream("mondial_2022_04_04.n3"))
.getOrElse({
System.err.println("Impossible to open dataset")
System.exit(1)
null
})
kg.read(mondialDataset, null, "TTL");
printf("Loaded %s triples\n", kg.listStatements().toList.size) printf("Loaded %s triples\n", kg.listStatements().toList.size)
val guessableEntities = (List[NamedEntity]() ++ val guessableEntities = (List[NamedEntity]() ++
getMondialNamedEntities(kg, "Country") ++ getMondialNamedEntities(kg, "Country") ++