package com.monead.semantic.education;
/**
* RdbToRdf - A proof-of-concept (POC) for converting relational
* data into RDF triples using inferencing
*
* This program uses Jena and Pellet to allow for the creation of
* a set of RDF triples based on data from a relational database
* query.
*
* NOTE: THIS PROGRAM IS SOLELY FOR USE AS A PROOF-OF-CONCEPT. IT
* PLACES A DATABASE PASSWORD IN PLAINTEXT WITHIN A CONFIGURATION
* FILE. THIS IS AN INSECURE PRACTICE THAT SHOULD NOT BE USED FOR
* AN APPLICATION THAT ACCESSES PRODUCTION DATA.
*
* Copyright (C) 2010 David S. Read
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* For information on Jena: http://jena.sourceforge.net/
* For information on Pellet: http://clarkparsia.com/pellet
*/
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import javax.management.RuntimeErrorException;
import org.mindswap.pellet.jena.PelletReasonerFactory;
import com.hp.hpl.jena.ontology.Individual;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.ontology.OntModelSpec;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.ModelMaker;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.ResIterator;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.reasoner.Reasoner;
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
public class RdbToRdf implements Runnable {
/**
* The version identifier
*/
public final static String VERSION = "1.0";
/**
* Properties file
*/
private final static String PROPERTIES_FILE_NAME = "RdbToRdf.properties";
/**
* Default namespace for the data loaded from the RDB
*/
private final static String DEFAULT_NAMESPACE = "http://monead.com/semantic/education#";
/**
* Default class for all the exported records
*/
private final static String DEFAULT_DATA_CLASS = "http://monead.com/semantic/education#RdbData";
/**
* Default value to place in front of PK value when used as an instance
* name.
*
* This is done in case the value is numeric.
*
* Error results from having an instance name beginning with a digit since
* URIs must start with an alpha character.
*/
private final static String DEFAULT_INSTANCE_NAME_PREFIX = "PK_";
/**
* Data value for PK value if the PK is null - shouldn't happen with true PK columns
*/
private final static String DEFAULT_NULL_PK_VALUE_FLAG = "NULL_PK";
/**
* Data value to replace a null column value
*/
private final static String DEFAULT_NULL_VALUE_FLAG = "NULL";
/**
* The namespace to use for the generated resources and predicates
*/
private String rdfNamespace;
/**
* The class of the data instances generated
*/
private String rdfDataClass;
/**
* The prefix for instance names (since PKs are often numbers)
*/
private String rdfInstanceNamePrefix;
/**
* The value to insert if a PK column has a null
*
* This may happen if the "PK" column is a surrogate
* key.
*/
private String dataNullPkValueFlag;
/**
* The value to insert if a column, other than the chosen
* PK column, contains a null.
*/
private String dataNullValueFlag;
/**
* The set of formats that can be output. These are defined by Jena
*/
private final static String[] FORMATS = { "Turtle", "N-Triples", "RDF/XML",
"N3" };
/**
* The reasoning level to use
*
* TODO allow this to be controlled from command line or configuration
*/
private static final String reasoningLevel = "owl";
/**
* The output format for the triples
*
* This will default to match the input format
*
* TODO allow control from command line or configuration
*/
private String outputFormat;
/**
* The reasoning levels available.
*/
protected final static String[] REASONING_LEVELS = { "none", "rdfs", "owl" };
/**
* A default file to write - in case one is not supplied on the command line
*/
private final static String DEFAULT_INPUT_FILE = "DSRRdbToRdfTestOntology.turtle";
/**
* Constant used if a value cannot be found in an array
*/
private final static int UNKNOWN = -1;
/**
* The name (and path if necessary) to the ontology being loaded
*/
private String dbSql;
/**
* The label (name) of the PK column from the SQL statement
*
* The values in this column will represent the subjects of
* the asserted triples
*/
private String dbPkColumnLabel;
/**
* The database driver class
*/
private String dbDriverClass;
/**
* The connection string for the database
*/
private String dbConnectionURL;
/**
* The user id to use for the database connection
*/
private String dbUserId;
/**
* The password to use for the database connection
*/
private String dbPassword;
/**
* The name (and path if necessary) to the ontology being loaded
*/
private String inputFileName;
/**
* The name (and path if necessary) to the output file for the output
* triples
*/
private String outputFileName;
/**
* The loaded ontology
*/
private OntModel ontModel;
/**
* Constructor - sets up the input and output file paths and the triples map
*
* @param inputFileName
* The name (and optional path) to an ontology
* @param outputFileName
* The name (and optional path) for the output
*/
public RdbToRdf(String inputFileName, String outputFileName) {
setInputFileName(inputFileName);
setOutputFileName(outputFileName);
loadProperties();
}
/**
* Load the properties to configure the program.
*
* At a minimum the properties file must contain all
* of the database (db.*) property values.
*/
private void loadProperties() {
Properties properties;
properties = new Properties();
try {
properties.load(new FileReader(PROPERTIES_FILE_NAME));
}
catch (IOException ioExc) {
System.out.println("Cannot read properties file: " + PROPERTIES_FILE_NAME);
System.out.println("Full path: " + new File(PROPERTIES_FILE_NAME).getPath());
ioExc.printStackTrace();
throw new RuntimeException("Cannot read properties file", ioExc);
}
dbDriverClass = mustExist(properties, "db.driverclass");
dbConnectionURL = mustExist(properties, "db.connectionurl");
dbUserId = mustExist(properties, "db.userid");
dbPassword = mustExist(properties, "db.password");
dbSql = mustExist(properties, "db.sql");
dbPkColumnLabel = mustExist(properties, "db.pkcolumnlabel");
rdfDataClass = properties.getProperty("rdf.dataclass", DEFAULT_DATA_CLASS);
rdfInstanceNamePrefix = properties.getProperty("rdf.instancenameprefix", DEFAULT_INSTANCE_NAME_PREFIX);
rdfNamespace = properties.getProperty("rdf.namespace", DEFAULT_NAMESPACE);
dataNullPkValueFlag = properties.getProperty("data.defaultnullpkflag", DEFAULT_NULL_PK_VALUE_FLAG);
dataNullValueFlag = properties.getProperty("data.defaultnullflag", DEFAULT_NULL_VALUE_FLAG);
reportConfig();
}
/**
* Display the configuration information based on the command
* line, loaded properties and accepted default values.
*/
private void reportConfig() {
System.out.println("Loaded Configuration");
System.out.println(" Input File: " + inputFileName);
System.out.println(" Output File: " + outputFileName);
System.out.println();
System.out.println(" Driver Class: " + dbDriverClass);
System.out.println(" Connection URL: " + dbConnectionURL);
System.out.println(" User Id: " + dbUserId);
System.out.println(" PK Column Label: " + dbPkColumnLabel);
System.out.println();
System.out.println(" Data Class: " + rdfDataClass);
System.out.println(" Instance Name Prefix: " + rdfInstanceNamePrefix);
System.out.println(" Namespace: " + rdfNamespace);
System.out.println();
System.out.println(" Null PK Value Flag: " + dataNullPkValueFlag);
System.out.println(" Null Value Flag: " + dataNullValueFlag);
System.out.println();
}
/**
* Retrieve a property from a Properties instance. The property
* must exist and have a non-empty value (e.g. it is required).
* If the property does not exist, or is blank, an exception
* will be thrown.
*
* @param properties The properties instance
* @param key the key whose value is being retrieved
*
* @return The value associated with the supplied key
*/
private static String mustExist(Properties properties, String key) {
String value;
value = properties.getProperty(key);
if (value == null) {
throw new RuntimeException("The property " + key + " is required in the properties file (" + PROPERTIES_FILE_NAME + ")");
}
return value;
}
/**
* Perform the steps to load, compare and report on the ontology
*/
public void run() {
System.out.println("Load model with reasoner: " + reasoningLevel);
loadModel(reasoningLevel);
loadModelFromRdb();
storeModel();
}
private void loadModelFromRdb() throws IllegalStateException {
Connection connection;
java.sql.Statement statement;
ResultSet resultSet;
ResultSetMetaData resultSetMetaData;
int numColumns;
connection = null;
statement = null;
resultSet = null;
// Create the class for all the exported data
ontModel.createClass(rdfDataClass);
// Access the database, run the query and create the triples
try {
Class.forName(dbDriverClass);
connection = DriverManager.getConnection(
dbConnectionURL, dbUserId, dbPassword);
statement = connection.createStatement();
resultSet = statement.executeQuery(dbSql);
resultSetMetaData = resultSet.getMetaData();
numColumns = resultSetMetaData.getColumnCount();
while (resultSet.next()) {
System.out.println("Row PK: " + dbPkColumnLabel + "="
+ resultSet.getString(dbPkColumnLabel));
for (int col = 1; col <= numColumns; ++col) {
if (resultSetMetaData.getColumnName(col).equals(
dbPkColumnLabel)) {
addInstance(resultSet.getString(dbPkColumnLabel));
} else {
addStatement(resultSet.getString(dbPkColumnLabel),
resultSetMetaData.getColumnLabel(col),
resultSet.getString(col));
}
}
}
} catch (ClassNotFoundException cnfExc) {
System.out
.println("Could not find the db driver class on the classpath: "
+ dbDriverClass);
cnfExc.printStackTrace();
throw new IllegalStateException(
"DB Driver class not found on classpath", cnfExc);
} catch (SQLException sqlExc) {
System.out.println("Error acessing the database (" + dbConnectionURL
+ "): " + sqlExc.getMessage());
sqlExc.printStackTrace();
throw new IllegalStateException("Error accessing the database",
sqlExc);
} finally {
if (resultSet != null) {
try {
resultSet.close();
} catch (Throwable throwable) {
System.out.println("Failed to close result set");
}
}
if (statement != null) {
try {
statement.close();
} catch (Throwable throwable) {
System.out.println("Failed to close statement");
}
}
if (connection != null) {
try {
connection.close();
} catch (Throwable throwable) {
System.out.println("Failed to close connection");
}
}
}
}
/**
* Add the PK value as a new instance of the data class. The name is
* prepended with the DEFAULT_INSTANCE_NAME_PREFIX to prevent issues with
* values that start with a digit.
*
* This method also adds an RDF label containing the actual value
*
* @param subject The subject which will be a class instance
*/
private void addInstance(String subject) {
Resource resource;
Property property;
Resource object;
System.out.println("Add subject as class instance: " + subject);
if (subject == null) {
subject = dataNullPkValueFlag;
}
// subject is instance of the RdbData class
resource = ontModel.createResource(rdfNamespace
+ rdfInstanceNamePrefix + makeSafeURIValue(subject));
property = ontModel
.createProperty("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
object = ontModel.createResource(rdfDataClass);
ontModel.add(resource, property, object);
// Add the actual subject value as a label
property = ontModel
.createProperty("http://www.w3.org/2000/01/rdf-schema#label");
ontModel.add(resource, property, subject);
}
/**
* Add the RDB data, treating the column data as a literal value
*
* @param subject
* The subject of the triple
* @param predicate
* The predicate of the triple
* @param object
* The object of the triple (treated as a literal)
*/
private void addStatement(String subject, String predicate,
String objectLiteral) {
Resource resource;
Property property;
if (subject == null) {
subject = dataNullPkValueFlag;
}
if (objectLiteral == null) {
System.out.println("Null column value for row: PK:" + subject + " Column:" + predicate);
objectLiteral = dataNullValueFlag;
}
// Add the data as a triple
resource = ontModel.createResource(rdfNamespace
+ rdfInstanceNamePrefix + makeSafeURIValue(subject));
property = ontModel.createProperty(rdfNamespace
+ makeSafeURIValue(predicate));
System.out.println("Add statement to model: " + resource + "->"
+ property + "->" + objectLiteral);
ontModel.add(resource, property, objectLiteral);
}
/**
* Very crude method to create a value that will work as an RDF resource -
* e.g. removes spaces. If the data contains other special characters then
* this function will need to be fleshed out.
*
* @param value
* the value to be made URI-safe
*
* @return a URI-safe value (no spaces)
*/
private static String makeSafeURIValue(String value) {
return value.replaceAll(" ", "_");
}
/**
* Writes the triples to a data file.
*
*/
private void storeModel() {
FileWriter out;
out = null;
System.out.println("Write loaded data to file, " + outputFileName
+ ", in format: " + outputFormat);
try {
out = new FileWriter(outputFileName, false);
ontModel.write(out, outputFormat);
} catch (IOException ioExc) {
System.out.println("Unable to write to file: " + outputFileName);
ioExc.printStackTrace();
throw new RuntimeException("unable to write output file ("
+ outputFileName + ")", ioExc);
} finally {
if (out != null) {
try {
out.close();
} catch (Throwable throwable) {
System.out.println("Failed to close output file: "
+ outputFileName);
throwable.printStackTrace();
throw new RuntimeException("Failed to close output file",
throwable);
}
}
}
}
/**
* Get the set of defined ontology file formats that the program can load as
* a CSV list String
*
* @return The known ontology file formats as a CSV list
*/
public final static String getFormatsAsCSV() {
return getArrayAsCSV(FORMATS);
}
/**
* Create a CSV list from a String array
*
* @param array
* An array
* @return The array values in a CSV list
*/
public final static String getArrayAsCSV(String[] array) {
StringBuffer csv;
csv = new StringBuffer();
for (String value : array) {
if (csv.length() > 0) {
csv.append(", ");
}
csv.append(value);
}
return csv.toString();
}
/**
* Set the input file name, where the ontology is located
*
* @param inputFileName
* The name of the file containing the ontology
*/
public void setInputFileName(String inputFileName) {
this.inputFileName = inputFileName;
}
/**
* Get the input file name for the location of the ontology
*
* @return The input file name where the ontology is located
*/
public String getInputFileName() {
return inputFileName;
}
/**
* Set the output file name, where the report should be written
*
* @param outputFileName
* The output file name
*/
public void setOutputFileName(String outputFileName) {
this.outputFileName = outputFileName;
}
/**
* Get the output file name for the location of the generated report
*
* @return The output file name
*/
public String getOutputFileName() {
return outputFileName;
}
/**
* Create a model with a reasoner set based on the chosen reasoning level.
*
* @param reasoningLevel
* The reasoning level for this model
*
* @return The created ontology model
*/
private OntModel createModel(String reasoningLevel) {
OntModel model;
int reasoningLevelIndex;
model = null;
reasoningLevelIndex = getReasoningLevelIndex(reasoningLevel);
if (reasoningLevelIndex == 0) { // None
model = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM);
} else if (reasoningLevelIndex == 1) { // RDFS
model = ModelFactory
.createOntologyModel(OntModelSpec.OWL_DL_MEM_RDFS_INF);
} else if (reasoningLevelIndex == 2) { // OWL
Reasoner reasoner = PelletReasonerFactory.theInstance().create();
Model infModel = ModelFactory.createInfModel(reasoner, ModelFactory
.createDefaultModel());
model = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM,
infModel);
}
return model;
}
/**
* Obtain an ontology model set to the chosen reasoning level. Load the
* ontology file into the model
*
* @param reasoningLevel
* The selected reasoning level
*/
private void loadModel(String reasoningLevel) {
FileInputStream inputStream = null;
String modelFormat;
try {
} catch (Throwable throwable) {
System.err.println("Failed to open input file: " + inputFileName);
throwable.printStackTrace();
System.exit(3);
}
modelFormat = null;
for (String format : FORMATS) {
try {
inputStream = new FileInputStream(inputFileName);
ontModel = createModel(reasoningLevel);
ontModel.read(inputStream, null, format.toUpperCase());
modelFormat = format;
break;
} catch (Throwable throwable) {
System.err.println("Error reading file: "
+ throwable.getClass().getName() + ": as format: "
+ format + ": " + throwable.getMessage());
} finally {
try {
inputStream.close();
} catch (Throwable throwable) {
System.err.println("Error closing input file");
throwable.printStackTrace();
System.exit(4);
}
}
}
if (modelFormat == null) {
throw new IllegalStateException(
"The format of the input file cannot be determined.\nTried: "
+ getFormatsAsCSV());
} else {
System.out.println("Loaded model " + inputFileName
+ " using format: " + modelFormat);
}
// Save this so that output will be written in the same format
outputFormat = modelFormat;
}
/**
* Get the index position of the supplied reasoning level label
*
* @param reasonerName
* A reasoning level label
*
* @return The index position of the reasoning level. Will be equal to the
* constant UNKNOWN if the value cannot be found in the collection
* of known reasoning levels
*/
public final static int getReasoningLevelIndex(String reasonerName) {
return getIndexValue(REASONING_LEVELS, reasonerName);
}
/**
* Find a String value within and array of Strings. Return the index
* position where the value was found.
*
* @param array
* An array of string to search
* @param name
* The value to find in the array
*
* @return The position where the value was found in the array. Will be
* equal to the constant UNKNOWN if the value cannot be found in the
* collection of known reasoning levels
*/
public final static int getIndexValue(String[] array, String name) {
Integer indexValue;
indexValue = null;
for (int index = 0; index < array.length && indexValue == null; ++index) {
if (array[index].toUpperCase().equals(name.toUpperCase())) {
indexValue = index;
}
}
return indexValue == null ? UNKNOWN : indexValue;
}
/**
* The execution point for the program. Verifies the input arguments have
* been supplied, creates an instance of the RdbToRdf class and
* creates a thread to run the instance. The program requires an input file
* name to be supplied on the command line. An optional output file name may
* also be supplied.
*
* @param args
* The array of input arguments
*/
public static void main(String[] args) {
Runnable runnable;
int argNum;
String inputFileName = null;
String outputFileName = null;
if (args.length > 2) {
System.err
.println("usage: RdbToRdf [ [