diff --git a/prepare/build.gradle b/prepare/build.gradle index 759553c..cafc7c9 100644 --- a/prepare/build.gradle +++ b/prepare/build.gradle @@ -1,18 +1,28 @@ plugins { id 'application' + id 'scala' } +version '0.1' + repositories { mavenCentral() } dependencies { + implementation 'org.scala-lang:scala3-library_3:3.1.2' implementation 'org.apache.jena:apache-jena-libs:4.4.0' implementation 'org.apache.logging.log4j:log4j-core:2.17.2' implementation 'org.slf4j:slf4j-simple:1.7.36' } -version '0.1' +sourceSets { + main { + java.srcDirs = ["src/main"] + scala.srcDirs = ["src/main"] + resources.srcDirs = ["src/resources"] + } +} java { sourceCompatibility = 11 diff --git a/prepare/src/main/Main.scala b/prepare/src/main/Main.scala new file mode 100644 index 0000000..a8ba710 --- /dev/null +++ b/prepare/src/main/Main.scala @@ -0,0 +1,42 @@ +import org.apache.jena.query.QueryExecutionFactory +import org.apache.jena.rdf.model.{InfModel, Model, ModelFactory} +import org.apache.jena.reasoner.ReasonerRegistry + +import java.io.InputStream + +object Main extends App { + + { + println("Loading knowledge graph...") + val kg: InfModel = ModelFactory.createInfModel(ReasonerRegistry.getTransitiveReasoner, ModelFactory.createDefaultModel) + val mondialDataset: InputStream = Option(ClassLoader.getSystemResourceAsStream("mondial_2022_04_04.n3")) + .getOrElse({ + System.err.println("Impossible to open dataset") + System.exit(1) + null + }) + kg.read(mondialDataset, null, "TTL"); + printf("Loaded %s triples\n", kg.listStatements().toList.size) + val countries = getMondialNamedEntities(kg, "Country") + printf("There are %d countries in the dataset\n", countries.size) + } + + def getMondialNamedEntities(kg: Model, prefixedType: String): List[NamedEntity] = { + var result: List[NamedEntity] = Nil + //noinspection HttpUrlsUsage + val MONDIAL_PREFIX = "http://www.semwebtech.org/mondial/10/meta#" + val fullType = MONDIAL_PREFIX + prefixedType; + val nameProperty = MONDIAL_PREFIX + "name" + val queryString = String.format("SELECT ?entity ?name WHERE { ?entity a <%s> ; <%s> ?name. }", fullType, nameProperty) + val resultSet = QueryExecutionFactory.create(queryString, kg).execSelect() + while (resultSet.hasNext) { + val solution = resultSet.next() + result = result :+ new NamedEntity( + solution.getLiteral("?name").getString, + solution.getResource("?entity") + ) + } + result + } + +} diff --git a/prepare/src/main/NamedEntity.scala b/prepare/src/main/NamedEntity.scala new file mode 100644 index 0000000..dbdb7cc --- /dev/null +++ b/prepare/src/main/NamedEntity.scala @@ -0,0 +1,5 @@ +import org.apache.jena.rdf.model.Resource + +class NamedEntity(val Name: String, val entity: Resource) { + +} diff --git a/prepare/src/main/java/Main.java b/prepare/src/main/java/Main.java deleted file mode 100644 index a15062e..0000000 --- a/prepare/src/main/java/Main.java +++ /dev/null @@ -1,60 +0,0 @@ -import org.apache.jena.query.QueryExecution; -import org.apache.jena.query.QueryExecutionFactory; -import org.apache.jena.query.QuerySolution; -import org.apache.jena.query.ResultSet; -import org.apache.jena.rdf.model.InfModel; -import org.apache.jena.rdf.model.Model; -import org.apache.jena.rdf.model.ModelFactory; -import org.apache.jena.rdf.model.Resource; -import org.apache.jena.reasoner.ReasonerRegistry; - -import java.io.InputStream; -import java.util.HashMap; -import java.util.Map; - -public final class Main -{ - public static void main(String[] args) - { - System.out.println("Loading knowledge graph..."); - InfModel kg = ModelFactory.createInfModel(ReasonerRegistry.getTransitiveReasoner(), ModelFactory.createDefaultModel()); - final InputStream mondialDataset = ClassLoader.getSystemResourceAsStream("mondial_2022_04_04.n3"); - if (mondialDataset == null) - { - System.err.println("Impossible to open mondial dataset"); - System.exit(1); - } - kg.read(mondialDataset, null, "TTL"); - System.out.printf("Loaded %s triples\n", kg.listStatements().toList().size()); - - { - Map countries = mondialGetEntitiesWithName(kg, "Country"); - System.out.printf("There are %d countries in the dataset\n", countries.size()); - } - - } - - private static Map mondialGetEntitiesWithName(Model kg, String prefixedType) - { - Map result = new HashMap<>(); - - String queryString = String.format("PREFIX m: SELECT ?entity ?name WHERE { ?entity a m:%s ; m:name ?name. }", prefixedType); - try (QueryExecution execution = QueryExecutionFactory.create(queryString, kg)) - { - final ResultSet resultSet = execution.execSelect(); - while (resultSet.hasNext()) - { - QuerySolution solution = resultSet.next(); - Resource entity = solution.getResource("?entity"); - String name = solution.getLiteral("?name").getString(); - if (result.containsKey(name)) - { - System.err.printf("Error: multiple entities with the name %s\n", name); - System.exit(1); - } - result.put(name, entity); - } - } - return result; - } -} diff --git a/prepare/src/main/resources/mondial_2022_04_04.n3 b/prepare/src/resources/mondial_2022_04_04.n3 similarity index 100% rename from prepare/src/main/resources/mondial_2022_04_04.n3 rename to prepare/src/resources/mondial_2022_04_04.n3