package com.monead.semantic.education;

/**
 * RdbToRdf - A proof-of-concept (POC) for converting relational
 * data into RDF triples using inferencing
 * 
 * This program uses Jena and Pellet to allow for the creation of
 * a set of RDF triples based on data from a relational database
 * query.
 * 
 * NOTE: THIS PROGRAM IS SOLELY FOR USE AS A PROOF-OF-CONCEPT.  IT
 * PLACES A DATABASE PASSWORD IN PLAINTEXT WITHIN A CONFIGURATION
 * FILE.  THIS IS AN INSECURE PRACTICE THAT SHOULD NOT BE USED FOR
 * AN APPLICATION THAT ACCESSES PRODUCTION DATA.
 * 
 *    Copyright (C) 2010 David S. Read
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU Affero General Public License as
 *    published by the Free Software Foundation, either version 3 of the
 *    License, or (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU Affero General Public License for more details.
 *
 *    You should have received a copy of the GNU Affero General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *    
 *    For information on Jena: http://jena.sourceforge.net/
 *    For information on Pellet: http://clarkparsia.com/pellet
 */

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import javax.management.RuntimeErrorException;

import org.mindswap.pellet.jena.PelletReasonerFactory;

import com.hp.hpl.jena.ontology.Individual;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.ontology.OntModelSpec;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.ModelMaker;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.ResIterator;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.reasoner.Reasoner;
import com.hp.hpl.jena.util.iterator.ExtendedIterator;

public class RdbToRdf implements Runnable {
	/**
	 * The version identifier
	 */
	public final static String VERSION = "1.0";

	/**
	 * Properties file
	 */
	private final static String PROPERTIES_FILE_NAME = "RdbToRdf.properties";
	
	/**
	 * Default namespace for the data loaded from the RDB
	 */
	private final static String DEFAULT_NAMESPACE = "http://monead.com/semantic/education#";

	/**
	 * Default class for all the exported records
	 */
	private final static String DEFAULT_DATA_CLASS = "http://monead.com/semantic/education#RdbData";

	/**
	 * Default value to place in front of PK value when used as an instance
	 * name.
	 * 
	 * This is done in case the value is numeric.
	 * 
	 * Error results from having an instance name beginning with a digit since
	 * URIs must start with an alpha character.
	 */
	private final static String DEFAULT_INSTANCE_NAME_PREFIX = "PK_";

	/**
	 * Data value for PK value if the PK is null - shouldn't happen with true PK columns 
	 */
	private final static String DEFAULT_NULL_PK_VALUE_FLAG = "NULL_PK";
	
	/**
	 * Data value to replace a null column value
	 */
	private final static String DEFAULT_NULL_VALUE_FLAG = "NULL";

	/**
	 * The namespace to use for the generated resources and predicates
	 */
	private String rdfNamespace;
	
	/**
	 * The class of the data instances generated
	 */
	private String rdfDataClass;
	
	/**
	 * The prefix for instance names (since PKs are often numbers)
	 */
	private String rdfInstanceNamePrefix;
	
	/**
	 * The value to insert if a PK column has a null
	 * 
	 * This may happen if the "PK" column is a surrogate
	 * key.
	 */
	private String dataNullPkValueFlag;
	
	/**
	 * The value to insert if a column, other than the chosen
	 * PK column, contains a null.
	 */
	private String dataNullValueFlag;
	
	/**
	 * The set of formats that can be output. These are defined by Jena
	 */
	private final static String[] FORMATS = { "Turtle", "N-Triples", "RDF/XML",
			"N3" };

	/**
	 * The reasoning level to use
	 * 
	 * TODO allow this to be controlled from command line or configuration
	 */
	private static final String reasoningLevel = "owl";

	/**
	 * The output format for the triples
	 * 
	 * This will default to match the input format
	 * 
	 * TODO allow control from command line or configuration
	 */
	private String outputFormat;

	/**
	 * The reasoning levels available.
	 */
	protected final static String[] REASONING_LEVELS = { "none", "rdfs", "owl" };

	/**
	 * A default file to write - in case one is not supplied on the command line
	 */
	private final static String DEFAULT_INPUT_FILE = "DSRRdbToRdfTestOntology.turtle";

	/**
	 * Constant used if a value cannot be found in an array
	 */
	private final static int UNKNOWN = -1;

	/**
	 * The name (and path if necessary) to the ontology being loaded
	 */
	private String dbSql;

	/**
	 * The label (name) of the PK column from the SQL statement
	 * 
	 * The values in this column will represent the subjects of 
	 * the asserted triples
	 */
	private String dbPkColumnLabel;
	
	/**
	 * The database driver class
	 */
	private String dbDriverClass;
	
	/**
	 * The connection string for the database
	 */
	private String dbConnectionURL;

	/**
	 * The user id to use for the database connection
	 */
	private String dbUserId;

	/**
	 * The password to use for the database connection
	 */
	private String dbPassword;

	/**
	 * The name (and path if necessary) to the ontology being loaded
	 */
	private String inputFileName;

	/**
	 * The name (and path if necessary) to the output file for the output
	 * triples
	 */
	private String outputFileName;

	/**
	 * The loaded ontology
	 */
	private OntModel ontModel;

	/**
	 * Constructor - sets up the input and output file paths and the triples map
	 * 
	 * @param inputFileName
	 *            The name (and optional path) to an ontology
	 * @param outputFileName
	 *            The name (and optional path) for the output
	 */
	public RdbToRdf(String inputFileName, String outputFileName) {
		setInputFileName(inputFileName);
		setOutputFileName(outputFileName);
		loadProperties();
	}
	
	/**
	 * Load the properties to configure the program.
	 * 
	 * At a minimum the properties file must contain all
	 * of the database (db.*) property values.
	 */
	private void loadProperties() {
		Properties properties;
		
		properties = new Properties();
		try {
			properties.load(new FileReader(PROPERTIES_FILE_NAME));
		}
		catch (IOException ioExc) {
			System.out.println("Cannot read properties file: " + PROPERTIES_FILE_NAME);
			System.out.println("Full path: " + new File(PROPERTIES_FILE_NAME).getPath());
			ioExc.printStackTrace();
			throw new RuntimeException("Cannot read properties file", ioExc);
		}
		
		dbDriverClass = mustExist(properties, "db.driverclass");
		dbConnectionURL = mustExist(properties, "db.connectionurl");
		dbUserId = mustExist(properties, "db.userid");
		dbPassword = mustExist(properties, "db.password");
		dbSql = mustExist(properties, "db.sql");
		dbPkColumnLabel = mustExist(properties, "db.pkcolumnlabel");
		
		rdfDataClass = properties.getProperty("rdf.dataclass", DEFAULT_DATA_CLASS);
		rdfInstanceNamePrefix = properties.getProperty("rdf.instancenameprefix", DEFAULT_INSTANCE_NAME_PREFIX);
		rdfNamespace = properties.getProperty("rdf.namespace", DEFAULT_NAMESPACE);
		dataNullPkValueFlag = properties.getProperty("data.defaultnullpkflag", DEFAULT_NULL_PK_VALUE_FLAG);
		dataNullValueFlag = properties.getProperty("data.defaultnullflag", DEFAULT_NULL_VALUE_FLAG);
		
		reportConfig();
	}
	
	/**
	 * Display the configuration information based on the command
	 * line, loaded properties and accepted default values.
	 */
	private void reportConfig() {
		System.out.println("Loaded Configuration");
		System.out.println("  Input File: " + inputFileName);
		System.out.println("  Output File: " + outputFileName);
		System.out.println();
		System.out.println("  Driver Class: " + dbDriverClass);
		System.out.println("  Connection URL: " + dbConnectionURL);
		System.out.println("  User Id: " + dbUserId);
		System.out.println("  PK Column Label: " + dbPkColumnLabel);
		System.out.println();
		System.out.println("  Data Class: " + rdfDataClass);
		System.out.println("  Instance Name Prefix: " + rdfInstanceNamePrefix);
		System.out.println("  Namespace: " + rdfNamespace);
		System.out.println();
		System.out.println("  Null PK Value Flag: " + dataNullPkValueFlag);
		System.out.println("  Null Value Flag: " + dataNullValueFlag);
		System.out.println();
	}

	/**
	 * Retrieve a property from a Properties instance.  The property
	 * must exist and have a non-empty value (e.g. it is required).
	 * If the property does not exist, or is blank, an exception 
	 * will be thrown.
	 * 
	 * @param properties The properties instance
	 * @param key the key whose value is being retrieved
	 * 
	 * @return The value associated with the supplied key
	 */
	private static String mustExist(Properties properties, String key) {
		String value;
		
		value = properties.getProperty(key);
		
		if (value == null) {
			throw new RuntimeException("The property " + key + " is required in the properties file (" + PROPERTIES_FILE_NAME + ")");
		}
		
		return value;
	}
	
	/**
	 * Perform the steps to load, compare and report on the ontology
	 */
	public void run() {
		System.out.println("Load model with reasoner: " + reasoningLevel);

		loadModel(reasoningLevel);

		loadModelFromRdb();

		storeModel();
	}

	private void loadModelFromRdb() throws IllegalStateException {
		Connection connection;
		java.sql.Statement statement;
		ResultSet resultSet;
		ResultSetMetaData resultSetMetaData;
		int numColumns;

		connection = null;
		statement = null;
		resultSet = null;

		// Create the class for all the exported data
		ontModel.createClass(rdfDataClass);

		// Access the database, run the query and create the triples
		try {
			Class.forName(dbDriverClass);
			connection = DriverManager.getConnection(
					dbConnectionURL, dbUserId, dbPassword);
			statement = connection.createStatement();
			resultSet = statement.executeQuery(dbSql);
			resultSetMetaData = resultSet.getMetaData();
			numColumns = resultSetMetaData.getColumnCount();

			while (resultSet.next()) {
				System.out.println("Row PK: " + dbPkColumnLabel + "="
						+ resultSet.getString(dbPkColumnLabel));
				for (int col = 1; col <= numColumns; ++col) {
					if (resultSetMetaData.getColumnName(col).equals(
							dbPkColumnLabel)) {
						addInstance(resultSet.getString(dbPkColumnLabel));
					} else {
						addStatement(resultSet.getString(dbPkColumnLabel),
								resultSetMetaData.getColumnLabel(col),
								resultSet.getString(col));
					}
				}
			}
		} catch (ClassNotFoundException cnfExc) {
			System.out
					.println("Could not find the db driver class on the classpath: "
							+ dbDriverClass);
			cnfExc.printStackTrace();
			throw new IllegalStateException(
					"DB Driver class not found on classpath", cnfExc);
		} catch (SQLException sqlExc) {
			System.out.println("Error acessing the database (" + dbConnectionURL
					+ "): " + sqlExc.getMessage());
			sqlExc.printStackTrace();
			throw new IllegalStateException("Error accessing the database",
					sqlExc);
		} finally {
			if (resultSet != null) {
				try {
					resultSet.close();
				} catch (Throwable throwable) {
					System.out.println("Failed to close result set");
				}
			}
			if (statement != null) {
				try {
					statement.close();
				} catch (Throwable throwable) {
					System.out.println("Failed to close statement");
				}
			}
			if (connection != null) {
				try {
					connection.close();
				} catch (Throwable throwable) {
					System.out.println("Failed to close connection");
				}
			}
		}
	}

	/**
	 * Add the PK value as a new instance of the data class. The name is
	 * prepended with the DEFAULT_INSTANCE_NAME_PREFIX to prevent issues with
	 * values that start with a digit.
	 * 
	 * This method also adds an RDF label containing the actual value
	 * 
	 * @param subject The subject which will be a class instance
	 */
	private void addInstance(String subject) {
		Resource resource;
		Property property;
		Resource object;

		System.out.println("Add subject as class instance: " + subject);

		if (subject == null) {
			subject = dataNullPkValueFlag;
		}
		
		// subject is instance of the RdbData class
		resource = ontModel.createResource(rdfNamespace
				+ rdfInstanceNamePrefix + makeSafeURIValue(subject));
		property = ontModel
				.createProperty("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
		object = ontModel.createResource(rdfDataClass);
		ontModel.add(resource, property, object);

		// Add the actual subject value as a label
		property = ontModel
				.createProperty("http://www.w3.org/2000/01/rdf-schema#label");
		ontModel.add(resource, property, subject);
	}

	/**
	 * Add the RDB data, treating the column data as a literal value
	 * 
	 * @param subject
	 *            The subject of the triple
	 * @param predicate
	 *            The predicate of the triple
	 * @param object
	 *            The object of the triple (treated as a literal)
	 */
	private void addStatement(String subject, String predicate,
			String objectLiteral) {
		Resource resource;
		Property property;

		if (subject == null) {
			subject = dataNullPkValueFlag;
		}

		if (objectLiteral == null) {
			System.out.println("Null column value for row: PK:" + subject + " Column:" + predicate);
			objectLiteral = dataNullValueFlag;
		}
		
		// Add the data as a triple
		resource = ontModel.createResource(rdfNamespace
				+ rdfInstanceNamePrefix + makeSafeURIValue(subject));
		property = ontModel.createProperty(rdfNamespace
				+ makeSafeURIValue(predicate));

		System.out.println("Add statement to model: " + resource + "->"
				+ property + "->" + objectLiteral);

		ontModel.add(resource, property, objectLiteral);
	}

	/**
	 * Very crude method to create a value that will work as an RDF resource -
	 * e.g. removes spaces. If the data contains other special characters then
	 * this function will need to be fleshed out.
	 * 
	 * @param value
	 *            the value to be made URI-safe
	 * 
	 * @return a URI-safe value (no spaces)
	 */
	private static String makeSafeURIValue(String value) {
		return value.replaceAll(" ", "_");
	}

	/**
	 * Writes the triples to a data file.
	 * 
	 */
	private void storeModel() {
		FileWriter out;

		out = null;

		System.out.println("Write loaded data to file, " + outputFileName
				+ ", in format: " + outputFormat);

		try {
			out = new FileWriter(outputFileName, false);
			ontModel.write(out, outputFormat);
		} catch (IOException ioExc) {
			System.out.println("Unable to write to file: " + outputFileName);
			ioExc.printStackTrace();
			throw new RuntimeException("unable to write output file ("
					+ outputFileName + ")", ioExc);
		} finally {
			if (out != null) {
				try {
					out.close();
				} catch (Throwable throwable) {
					System.out.println("Failed to close output file: "
							+ outputFileName);
					throwable.printStackTrace();
					throw new RuntimeException("Failed to close output file",
							throwable);
				}
			}
		}
	}

	/**
	 * Get the set of defined ontology file formats that the program can load as
	 * a CSV list String
	 * 
	 * @return The known ontology file formats as a CSV list
	 */
	public final static String getFormatsAsCSV() {
		return getArrayAsCSV(FORMATS);
	}

	/**
	 * Create a CSV list from a String array
	 * 
	 * @param array
	 *            An array
	 * @return The array values in a CSV list
	 */
	public final static String getArrayAsCSV(String[] array) {
		StringBuffer csv;

		csv = new StringBuffer();

		for (String value : array) {
			if (csv.length() > 0) {
				csv.append(", ");
			}
			csv.append(value);
		}

		return csv.toString();

	}

	/**
	 * Set the input file name, where the ontology is located
	 * 
	 * @param inputFileName
	 *            The name of the file containing the ontology
	 */
	public void setInputFileName(String inputFileName) {
		this.inputFileName = inputFileName;
	}

	/**
	 * Get the input file name for the location of the ontology
	 * 
	 * @return The input file name where the ontology is located
	 */
	public String getInputFileName() {
		return inputFileName;
	}

	/**
	 * Set the output file name, where the report should be written
	 * 
	 * @param outputFileName
	 *            The output file name
	 */
	public void setOutputFileName(String outputFileName) {
		this.outputFileName = outputFileName;
	}

	/**
	 * Get the output file name for the location of the generated report
	 * 
	 * @return The output file name
	 */
	public String getOutputFileName() {
		return outputFileName;
	}

	/**
	 * Create a model with a reasoner set based on the chosen reasoning level.
	 * 
	 * @param reasoningLevel
	 *            The reasoning level for this model
	 * 
	 * @return The created ontology model
	 */
	private OntModel createModel(String reasoningLevel) {
		OntModel model;
		int reasoningLevelIndex;

		model = null;

		reasoningLevelIndex = getReasoningLevelIndex(reasoningLevel);

		if (reasoningLevelIndex == 0) { // None
			model = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM);
		} else if (reasoningLevelIndex == 1) { // RDFS
			model = ModelFactory
					.createOntologyModel(OntModelSpec.OWL_DL_MEM_RDFS_INF);
		} else if (reasoningLevelIndex == 2) { // OWL
			Reasoner reasoner = PelletReasonerFactory.theInstance().create();
			Model infModel = ModelFactory.createInfModel(reasoner, ModelFactory
					.createDefaultModel());
			model = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM,
					infModel);
		}

		return model;
	}

	/**
	 * Obtain an ontology model set to the chosen reasoning level. Load the
	 * ontology file into the model
	 * 
	 * @param reasoningLevel
	 *            The selected reasoning level
	 */
	private void loadModel(String reasoningLevel) {
		FileInputStream inputStream = null;
		String modelFormat;

		try {
		} catch (Throwable throwable) {
			System.err.println("Failed to open input file: " + inputFileName);
			throwable.printStackTrace();
			System.exit(3);
		}

		modelFormat = null;

		for (String format : FORMATS) {
			try {
				inputStream = new FileInputStream(inputFileName);
				ontModel = createModel(reasoningLevel);
				ontModel.read(inputStream, null, format.toUpperCase());
				modelFormat = format;
				break;
			} catch (Throwable throwable) {
				System.err.println("Error reading file: "
						+ throwable.getClass().getName() + ": as format: "
						+ format + ": " + throwable.getMessage());
			} finally {
				try {
					inputStream.close();
				} catch (Throwable throwable) {
					System.err.println("Error closing input file");
					throwable.printStackTrace();
					System.exit(4);
				}
			}
		}

		if (modelFormat == null) {
			throw new IllegalStateException(
					"The format of the input file cannot be determined.\nTried: "
							+ getFormatsAsCSV());
		} else {
			System.out.println("Loaded model " + inputFileName
					+ " using format: " + modelFormat);
		}

		// Save this so that output will be written in the same format
		outputFormat = modelFormat;
	}

	/**
	 * Get the index position of the supplied reasoning level label
	 * 
	 * @param reasonerName
	 *            A reasoning level label
	 * 
	 * @return The index position of the reasoning level. Will be equal to the
	 *         constant UNKNOWN if the value cannot be found in the collection
	 *         of known reasoning levels
	 */
	public final static int getReasoningLevelIndex(String reasonerName) {
		return getIndexValue(REASONING_LEVELS, reasonerName);
	}

	/**
	 * Find a String value within and array of Strings. Return the index
	 * position where the value was found.
	 * 
	 * @param array
	 *            An array of string to search
	 * @param name
	 *            The value to find in the array
	 * 
	 * @return The position where the value was found in the array. Will be
	 *         equal to the constant UNKNOWN if the value cannot be found in the
	 *         collection of known reasoning levels
	 */
	public final static int getIndexValue(String[] array, String name) {
		Integer indexValue;

		indexValue = null;

		for (int index = 0; index < array.length && indexValue == null; ++index) {
			if (array[index].toUpperCase().equals(name.toUpperCase())) {
				indexValue = index;
			}
		}

		return indexValue == null ? UNKNOWN : indexValue;
	}

	/**
	 * The execution point for the program. Verifies the input arguments have
	 * been supplied, creates an instance of the RdbToRdf class and
	 * creates a thread to run the instance. The program requires an input file
	 * name to be supplied on the command line. An optional output file name may
	 * also be supplied.
	 * 
	 * @param args
	 *            The array of input arguments
	 */
	public static void main(String[] args) {
		Runnable runnable;
		int argNum;
		String inputFileName = null;
		String outputFileName = null;

		if (args.length > 2) {
			System.err
					.println("usage: RdbToRdf [<input file> [<output file>]]");
			System.exit(1);
		}

		argNum = 0;

		if (args.length > 0) {
			inputFileName = args[argNum++].trim();
		}

		if (inputFileName == null || inputFileName.length() == 0) {
			inputFileName = DEFAULT_INPUT_FILE;
			System.out.println("Using default input file: " + inputFileName);

		}
		if (args.length == 2) {
			outputFileName = args[argNum++].trim();
		}

		if (outputFileName == null || outputFileName.length() == 0) {
			outputFileName = inputFileName + ".out";
			System.out.println("Defaulting output file name to: "
					+ outputFileName);
		}

		runnable = new RdbToRdf(inputFileName, outputFileName);

		new Thread(runnable).start();
	}
}
