package com.monead.semantic.education; /** * RdbToRdf - A proof-of-concept (POC) for converting relational * data into RDF triples using inferencing * * This program uses Jena and Pellet to allow for the creation of * a set of RDF triples based on data from a relational database * query. * * NOTE: THIS PROGRAM IS SOLELY FOR USE AS A PROOF-OF-CONCEPT. IT * PLACES A DATABASE PASSWORD IN PLAINTEXT WITHIN A CONFIGURATION * FILE. THIS IS AN INSECURE PRACTICE THAT SHOULD NOT BE USED FOR * AN APPLICATION THAT ACCESSES PRODUCTION DATA. * * Copyright (C) 2010 David S. Read * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * For information on Jena: http://jena.sourceforge.net/ * For information on Pellet: http://clarkparsia.com/pellet */ import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.sql.Connection; import java.sql.DatabaseMetaData; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import javax.management.RuntimeErrorException; import org.mindswap.pellet.jena.PelletReasonerFactory; import com.hp.hpl.jena.ontology.Individual; import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.ontology.OntModelSpec; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.ModelMaker; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.ResIterator; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.reasoner.Reasoner; import com.hp.hpl.jena.util.iterator.ExtendedIterator; public class RdbToRdf implements Runnable { /** * The version identifier */ public final static String VERSION = "1.0"; /** * Properties file */ private final static String PROPERTIES_FILE_NAME = "RdbToRdf.properties"; /** * Default namespace for the data loaded from the RDB */ private final static String DEFAULT_NAMESPACE = "http://monead.com/semantic/education#"; /** * Default class for all the exported records */ private final static String DEFAULT_DATA_CLASS = "http://monead.com/semantic/education#RdbData"; /** * Default value to place in front of PK value when used as an instance * name. * * This is done in case the value is numeric. * * Error results from having an instance name beginning with a digit since * URIs must start with an alpha character. */ private final static String DEFAULT_INSTANCE_NAME_PREFIX = "PK_"; /** * Data value for PK value if the PK is null - shouldn't happen with true PK columns */ private final static String DEFAULT_NULL_PK_VALUE_FLAG = "NULL_PK"; /** * Data value to replace a null column value */ private final static String DEFAULT_NULL_VALUE_FLAG = "NULL"; /** * The namespace to use for the generated resources and predicates */ private String rdfNamespace; /** * The class of the data instances generated */ private String rdfDataClass; /** * The prefix for instance names (since PKs are often numbers) */ private String rdfInstanceNamePrefix; /** * The value to insert if a PK column has a null * * This may happen if the "PK" column is a surrogate * key. */ private String dataNullPkValueFlag; /** * The value to insert if a column, other than the chosen * PK column, contains a null. */ private String dataNullValueFlag; /** * The set of formats that can be output. These are defined by Jena */ private final static String[] FORMATS = { "Turtle", "N-Triples", "RDF/XML", "N3" }; /** * The reasoning level to use * * TODO allow this to be controlled from command line or configuration */ private static final String reasoningLevel = "owl"; /** * The output format for the triples * * This will default to match the input format * * TODO allow control from command line or configuration */ private String outputFormat; /** * The reasoning levels available. */ protected final static String[] REASONING_LEVELS = { "none", "rdfs", "owl" }; /** * A default file to write - in case one is not supplied on the command line */ private final static String DEFAULT_INPUT_FILE = "DSRRdbToRdfTestOntology.turtle"; /** * Constant used if a value cannot be found in an array */ private final static int UNKNOWN = -1; /** * The name (and path if necessary) to the ontology being loaded */ private String dbSql; /** * The label (name) of the PK column from the SQL statement * * The values in this column will represent the subjects of * the asserted triples */ private String dbPkColumnLabel; /** * The database driver class */ private String dbDriverClass; /** * The connection string for the database */ private String dbConnectionURL; /** * The user id to use for the database connection */ private String dbUserId; /** * The password to use for the database connection */ private String dbPassword; /** * The name (and path if necessary) to the ontology being loaded */ private String inputFileName; /** * The name (and path if necessary) to the output file for the output * triples */ private String outputFileName; /** * The loaded ontology */ private OntModel ontModel; /** * Constructor - sets up the input and output file paths and the triples map * * @param inputFileName * The name (and optional path) to an ontology * @param outputFileName * The name (and optional path) for the output */ public RdbToRdf(String inputFileName, String outputFileName) { setInputFileName(inputFileName); setOutputFileName(outputFileName); loadProperties(); } /** * Load the properties to configure the program. * * At a minimum the properties file must contain all * of the database (db.*) property values. */ private void loadProperties() { Properties properties; properties = new Properties(); try { properties.load(new FileReader(PROPERTIES_FILE_NAME)); } catch (IOException ioExc) { System.out.println("Cannot read properties file: " + PROPERTIES_FILE_NAME); System.out.println("Full path: " + new File(PROPERTIES_FILE_NAME).getPath()); ioExc.printStackTrace(); throw new RuntimeException("Cannot read properties file", ioExc); } dbDriverClass = mustExist(properties, "db.driverclass"); dbConnectionURL = mustExist(properties, "db.connectionurl"); dbUserId = mustExist(properties, "db.userid"); dbPassword = mustExist(properties, "db.password"); dbSql = mustExist(properties, "db.sql"); dbPkColumnLabel = mustExist(properties, "db.pkcolumnlabel"); rdfDataClass = properties.getProperty("rdf.dataclass", DEFAULT_DATA_CLASS); rdfInstanceNamePrefix = properties.getProperty("rdf.instancenameprefix", DEFAULT_INSTANCE_NAME_PREFIX); rdfNamespace = properties.getProperty("rdf.namespace", DEFAULT_NAMESPACE); dataNullPkValueFlag = properties.getProperty("data.defaultnullpkflag", DEFAULT_NULL_PK_VALUE_FLAG); dataNullValueFlag = properties.getProperty("data.defaultnullflag", DEFAULT_NULL_VALUE_FLAG); reportConfig(); } /** * Display the configuration information based on the command * line, loaded properties and accepted default values. */ private void reportConfig() { System.out.println("Loaded Configuration"); System.out.println(" Input File: " + inputFileName); System.out.println(" Output File: " + outputFileName); System.out.println(); System.out.println(" Driver Class: " + dbDriverClass); System.out.println(" Connection URL: " + dbConnectionURL); System.out.println(" User Id: " + dbUserId); System.out.println(" PK Column Label: " + dbPkColumnLabel); System.out.println(); System.out.println(" Data Class: " + rdfDataClass); System.out.println(" Instance Name Prefix: " + rdfInstanceNamePrefix); System.out.println(" Namespace: " + rdfNamespace); System.out.println(); System.out.println(" Null PK Value Flag: " + dataNullPkValueFlag); System.out.println(" Null Value Flag: " + dataNullValueFlag); System.out.println(); } /** * Retrieve a property from a Properties instance. The property * must exist and have a non-empty value (e.g. it is required). * If the property does not exist, or is blank, an exception * will be thrown. * * @param properties The properties instance * @param key the key whose value is being retrieved * * @return The value associated with the supplied key */ private static String mustExist(Properties properties, String key) { String value; value = properties.getProperty(key); if (value == null) { throw new RuntimeException("The property " + key + " is required in the properties file (" + PROPERTIES_FILE_NAME + ")"); } return value; } /** * Perform the steps to load, compare and report on the ontology */ public void run() { System.out.println("Load model with reasoner: " + reasoningLevel); loadModel(reasoningLevel); loadModelFromRdb(); storeModel(); } private void loadModelFromRdb() throws IllegalStateException { Connection connection; java.sql.Statement statement; ResultSet resultSet; ResultSetMetaData resultSetMetaData; int numColumns; connection = null; statement = null; resultSet = null; // Create the class for all the exported data ontModel.createClass(rdfDataClass); // Access the database, run the query and create the triples try { Class.forName(dbDriverClass); connection = DriverManager.getConnection( dbConnectionURL, dbUserId, dbPassword); statement = connection.createStatement(); resultSet = statement.executeQuery(dbSql); resultSetMetaData = resultSet.getMetaData(); numColumns = resultSetMetaData.getColumnCount(); while (resultSet.next()) { System.out.println("Row PK: " + dbPkColumnLabel + "=" + resultSet.getString(dbPkColumnLabel)); for (int col = 1; col <= numColumns; ++col) { if (resultSetMetaData.getColumnName(col).equals( dbPkColumnLabel)) { addInstance(resultSet.getString(dbPkColumnLabel)); } else { addStatement(resultSet.getString(dbPkColumnLabel), resultSetMetaData.getColumnLabel(col), resultSet.getString(col)); } } } } catch (ClassNotFoundException cnfExc) { System.out .println("Could not find the db driver class on the classpath: " + dbDriverClass); cnfExc.printStackTrace(); throw new IllegalStateException( "DB Driver class not found on classpath", cnfExc); } catch (SQLException sqlExc) { System.out.println("Error acessing the database (" + dbConnectionURL + "): " + sqlExc.getMessage()); sqlExc.printStackTrace(); throw new IllegalStateException("Error accessing the database", sqlExc); } finally { if (resultSet != null) { try { resultSet.close(); } catch (Throwable throwable) { System.out.println("Failed to close result set"); } } if (statement != null) { try { statement.close(); } catch (Throwable throwable) { System.out.println("Failed to close statement"); } } if (connection != null) { try { connection.close(); } catch (Throwable throwable) { System.out.println("Failed to close connection"); } } } } /** * Add the PK value as a new instance of the data class. The name is * prepended with the DEFAULT_INSTANCE_NAME_PREFIX to prevent issues with * values that start with a digit. * * This method also adds an RDF label containing the actual value * * @param subject The subject which will be a class instance */ private void addInstance(String subject) { Resource resource; Property property; Resource object; System.out.println("Add subject as class instance: " + subject); if (subject == null) { subject = dataNullPkValueFlag; } // subject is instance of the RdbData class resource = ontModel.createResource(rdfNamespace + rdfInstanceNamePrefix + makeSafeURIValue(subject)); property = ontModel .createProperty("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); object = ontModel.createResource(rdfDataClass); ontModel.add(resource, property, object); // Add the actual subject value as a label property = ontModel .createProperty("http://www.w3.org/2000/01/rdf-schema#label"); ontModel.add(resource, property, subject); } /** * Add the RDB data, treating the column data as a literal value * * @param subject * The subject of the triple * @param predicate * The predicate of the triple * @param object * The object of the triple (treated as a literal) */ private void addStatement(String subject, String predicate, String objectLiteral) { Resource resource; Property property; if (subject == null) { subject = dataNullPkValueFlag; } if (objectLiteral == null) { System.out.println("Null column value for row: PK:" + subject + " Column:" + predicate); objectLiteral = dataNullValueFlag; } // Add the data as a triple resource = ontModel.createResource(rdfNamespace + rdfInstanceNamePrefix + makeSafeURIValue(subject)); property = ontModel.createProperty(rdfNamespace + makeSafeURIValue(predicate)); System.out.println("Add statement to model: " + resource + "->" + property + "->" + objectLiteral); ontModel.add(resource, property, objectLiteral); } /** * Very crude method to create a value that will work as an RDF resource - * e.g. removes spaces. If the data contains other special characters then * this function will need to be fleshed out. * * @param value * the value to be made URI-safe * * @return a URI-safe value (no spaces) */ private static String makeSafeURIValue(String value) { return value.replaceAll(" ", "_"); } /** * Writes the triples to a data file. * */ private void storeModel() { FileWriter out; out = null; System.out.println("Write loaded data to file, " + outputFileName + ", in format: " + outputFormat); try { out = new FileWriter(outputFileName, false); ontModel.write(out, outputFormat); } catch (IOException ioExc) { System.out.println("Unable to write to file: " + outputFileName); ioExc.printStackTrace(); throw new RuntimeException("unable to write output file (" + outputFileName + ")", ioExc); } finally { if (out != null) { try { out.close(); } catch (Throwable throwable) { System.out.println("Failed to close output file: " + outputFileName); throwable.printStackTrace(); throw new RuntimeException("Failed to close output file", throwable); } } } } /** * Get the set of defined ontology file formats that the program can load as * a CSV list String * * @return The known ontology file formats as a CSV list */ public final static String getFormatsAsCSV() { return getArrayAsCSV(FORMATS); } /** * Create a CSV list from a String array * * @param array * An array * @return The array values in a CSV list */ public final static String getArrayAsCSV(String[] array) { StringBuffer csv; csv = new StringBuffer(); for (String value : array) { if (csv.length() > 0) { csv.append(", "); } csv.append(value); } return csv.toString(); } /** * Set the input file name, where the ontology is located * * @param inputFileName * The name of the file containing the ontology */ public void setInputFileName(String inputFileName) { this.inputFileName = inputFileName; } /** * Get the input file name for the location of the ontology * * @return The input file name where the ontology is located */ public String getInputFileName() { return inputFileName; } /** * Set the output file name, where the report should be written * * @param outputFileName * The output file name */ public void setOutputFileName(String outputFileName) { this.outputFileName = outputFileName; } /** * Get the output file name for the location of the generated report * * @return The output file name */ public String getOutputFileName() { return outputFileName; } /** * Create a model with a reasoner set based on the chosen reasoning level. * * @param reasoningLevel * The reasoning level for this model * * @return The created ontology model */ private OntModel createModel(String reasoningLevel) { OntModel model; int reasoningLevelIndex; model = null; reasoningLevelIndex = getReasoningLevelIndex(reasoningLevel); if (reasoningLevelIndex == 0) { // None model = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM); } else if (reasoningLevelIndex == 1) { // RDFS model = ModelFactory .createOntologyModel(OntModelSpec.OWL_DL_MEM_RDFS_INF); } else if (reasoningLevelIndex == 2) { // OWL Reasoner reasoner = PelletReasonerFactory.theInstance().create(); Model infModel = ModelFactory.createInfModel(reasoner, ModelFactory .createDefaultModel()); model = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM, infModel); } return model; } /** * Obtain an ontology model set to the chosen reasoning level. Load the * ontology file into the model * * @param reasoningLevel * The selected reasoning level */ private void loadModel(String reasoningLevel) { FileInputStream inputStream = null; String modelFormat; try { } catch (Throwable throwable) { System.err.println("Failed to open input file: " + inputFileName); throwable.printStackTrace(); System.exit(3); } modelFormat = null; for (String format : FORMATS) { try { inputStream = new FileInputStream(inputFileName); ontModel = createModel(reasoningLevel); ontModel.read(inputStream, null, format.toUpperCase()); modelFormat = format; break; } catch (Throwable throwable) { System.err.println("Error reading file: " + throwable.getClass().getName() + ": as format: " + format + ": " + throwable.getMessage()); } finally { try { inputStream.close(); } catch (Throwable throwable) { System.err.println("Error closing input file"); throwable.printStackTrace(); System.exit(4); } } } if (modelFormat == null) { throw new IllegalStateException( "The format of the input file cannot be determined.\nTried: " + getFormatsAsCSV()); } else { System.out.println("Loaded model " + inputFileName + " using format: " + modelFormat); } // Save this so that output will be written in the same format outputFormat = modelFormat; } /** * Get the index position of the supplied reasoning level label * * @param reasonerName * A reasoning level label * * @return The index position of the reasoning level. Will be equal to the * constant UNKNOWN if the value cannot be found in the collection * of known reasoning levels */ public final static int getReasoningLevelIndex(String reasonerName) { return getIndexValue(REASONING_LEVELS, reasonerName); } /** * Find a String value within and array of Strings. Return the index * position where the value was found. * * @param array * An array of string to search * @param name * The value to find in the array * * @return The position where the value was found in the array. Will be * equal to the constant UNKNOWN if the value cannot be found in the * collection of known reasoning levels */ public final static int getIndexValue(String[] array, String name) { Integer indexValue; indexValue = null; for (int index = 0; index < array.length && indexValue == null; ++index) { if (array[index].toUpperCase().equals(name.toUpperCase())) { indexValue = index; } } return indexValue == null ? UNKNOWN : indexValue; } /** * The execution point for the program. Verifies the input arguments have * been supplied, creates an instance of the RdbToRdf class and * creates a thread to run the instance. The program requires an input file * name to be supplied on the command line. An optional output file name may * also be supplied. * * @param args * The array of input arguments */ public static void main(String[] args) { Runnable runnable; int argNum; String inputFileName = null; String outputFileName = null; if (args.length > 2) { System.err .println("usage: RdbToRdf [ []]"); System.exit(1); } argNum = 0; if (args.length > 0) { inputFileName = args[argNum++].trim(); } if (inputFileName == null || inputFileName.length() == 0) { inputFileName = DEFAULT_INPUT_FILE; System.out.println("Using default input file: " + inputFileName); } if (args.length == 2) { outputFileName = args[argNum++].trim(); } if (outputFileName == null || outputFileName.length() == 0) { outputFileName = inputFileName + ".out"; System.out.println("Defaulting output file name to: " + outputFileName); } runnable = new RdbToRdf(inputFileName, outputFileName); new Thread(runnable).start(); } }