// bergmark - April, 2000 - Reference Linking project

// CreationDatabase.java

// A collection of static routines used for manipulating the creation
// database.  For the unit test, this is a dynamically created database.
// In real life, this will be a database stored as a bunch of files.

// Modifications:
// 2000-07-28:  no more DIDs, use URN as key for fetching creations.
// 2000-10-03:  add functionality to save and reload hashtables
//              Added secondary lookup hashtable, the fileIndex
// 2000-11-27:  Added call to Creation.reconstruct
// 2000-12-08:  Fixed fetchCreation() - if urn is in fileIndex, but
//              the datafiles for its Surrogate are gone, remove it
//              from the fileIndex and return null.
// 2001-03-27:  Reload the database if necessary before fetching a creation
// TBD: arrange for automatic call to save() as well

// 2001-06-23: Add getDOI() function to provide doi when reconstruct surrogate
//              BY: Yu Chen

package Linkable.Utility;

import Linkable.API.Creation;

import java.util.Hashtable;
import java.util.Enumeration;
import java.util.Vector;
import java.io.*;

public class CreationDatabase {

   private static final String ME = "CreationDatabase: ";
   private static final boolean DEBUG = CONFIG.DEBUG;
   private static boolean isLoaded = false;

   private static Hashtable creations = new Hashtable();
   private static Hashtable fileIndex = new Hashtable();

   // fetchCreation -
   /**
    * looks up the Creation for the specified index and returns it
    * @param - the index
    * @returns Creation object or null, if lookup fails
    * Note that currently fetchCreation assumes an exact match.
    * If the caller wants to use wildcards, call fetchCreation
    * with a series of URN strings.
    */

   public static Creation fetchCreation ( String urn ) {
      if ( !isLoaded ) reload();
      if ( DEBUG )
      System.err.println(ME+" fetching creation for urn ->" + urn + "<-");
      // If creations has a key equal to urn return that
      if ( creations.containsKey(urn) )
         return (Creation)creations.get(urn);
      // Else check the fileIndex and go reconstruct the Surrogate
      else if ( fileIndex.containsKey(urn) ) {
	 String doi = (String)fileIndex.get(urn);
	 if (existsSurrogate(doi) ) {
	    if ( DEBUG )
	    System.err.println (ME+"reconstructing creation for doi "
	    + doi + ", urn " + urn );
	    // Question: as a side-effect, should the creation be stashed?
	    //    Probably.  Since we went through the effort of de-parsing
	    // Note: we do not reconstruct the Surrogate, just the
	    // Creation in question.
	    return Creation.reconstruct ( doi, urn );
	 } else {
	    if ( DEBUG ) System.err.println(ME+"no data for this doi");
	    fileIndex.remove(urn);
	 }
      } else if ( DEBUG ) System.err.println(ME+"no such urn");
      // This creation is new to us
      return null;
   }

   // stashCreation -
   /**
    * Adds specified creation to the hashtable.  As a side-effect,
    * if there is a hash conflict, but the values are consistent
    * with each other, then fields get completed and the merged
    * Creation stays at that key.  If inconsistent, and the URN
    * is a DOI, we have a bad problem.  If inconsistent, but the
    * URN is synthetic, then add title words until the URNs become
    * distinct.  It is the URN for the object being added that gets
    * changed; it is important for the old key to stay as is, because
    * it is likely contained in many other objects.
    * @param The Creation object to be added to the table
    **/

   public static void stashCreation ( Creation c ) {

      if ( !isLoaded ) reload();

      if ( DEBUG )
      System.err.println (ME+"in stashCreation");

      String URN = c.getURN().trim();  // synthesized by this call, if necessary

      Creation prevValue = (Creation)creations.put ( URN, c);
      if ( prevValue == null ) return;  // all is well

      if ( DEBUG )
      System.err.println(ME+"found an old creation at this URN:\n" +
      prevValue.toString());

      // There was a Creation already at this key.  If it is
      // the same creation, all is well.  Just return.
      // TBD: we may need to return something that indicates that the
      // input Creation can be set to null (trashed)
      if ( prevValue.equals(c) ) return;

      // There was a Creation at this key, but it was not the same
      // one.  Put back the previous value at the original URN.

      c = (Creation)creations.put ( URN, prevValue );

      // Disambiguate the URN

      // The Creation that used to be in the table had a synthetic URN.
      // The Creations are inconsistent, so we need to disambiguate the
      // URNs.  Change the URN of the new Creation to something unique.
      while ( c.extendUrn() ) {
	 System.err.println (ME+"trying to stash with URN " + c.getURN() );
	 prevValue = ( Creation)creations.put (c.getURN().trim(), c );
	 if ( prevValue == null ) break;   // all is well
	 // otherwise put it back and extend some more
	 c = (Creation)creations.put ( prevValue.getURN().trim(), prevValue );
      }
   }

   /**
     * dump - Print out current contents of the Creation database
     */
   public static void dump () {
      if ( ! isLoaded ) reload();
//      System.err.println ("       Key              Date         Title ");
      for ( Enumeration e = creations.keys(); e.hasMoreElements(); ) {
         String d = (String)e.nextElement();
         Creation c = (Creation)creations.get(d);
//         System.err.println (c.getURN() + " "+ c.getDate() + " "+ c.getTitle()
//        + "\n       "+ dumpAuthors(c.getAuthors() ) );
      }
      System.err.println("\n    Key          Doi ");
      for ( Enumeration e = fileIndex.keys(); e.hasMoreElements(); ) {
         String d = (String)e.nextElement();
         String v = (String)fileIndex.get(d);
         System.err.println ( d + "       " + v);
      }
   }

   /**
     * dump - Write current contents of the Creation database to file
     * Unlike fileIndex, this will save the data even if the Surrogates
     * don't get saved.  On the other hand it will have to be de-parsed
     * in order to re-construct the creations hashtable
     */
   public static void dump (String fname) {
      if ( !isLoaded ) return;
      System.err.println(ME+"dumping the creation database to file " + fname);
      try {
      FileOutputStream out = new FileOutputStream(fname);
      String line="<?xml version=\"1.0\" ?>\n<creation-database>\n";
      out.write(line.getBytes());
      for ( Enumeration e = creations.keys(); e.hasMoreElements(); ) {
         String d = (String)e.nextElement();
         Creation c = (Creation)creations.get(d);
	 out.write((c.toXML("",d)).getBytes());
      }
      out.write("</creation-database>\n".getBytes());
      out.close();
      } catch (Exception e) {
	 System.err.println(ME+"failed to dump the Creation Database."
	 +" Exception is " + e.toString() );
      }
   }

   /**
     * save - add a new element to the fileIndex hashtable, if it isn't
     * there already.  Called when a Surrogate is being saved to storage.
     * @param - the URN of the creation being saved
     * @param - the DOI of the Surrogate of which this Creation is part
     * Note that we save only the last-encountered creation instance.
     * Hopefully it is the most correct one.
     * Assert that this routine is called only if a Surrogate really IS
     * written to storage.
     */
   public static void save ( String urn, String doi) {
      if ( ! isLoaded ) reload();
      isLoaded = true;
      // Check data validity
      if ( urn != null && !urn.equals("") && doi!= null )
        if ( (!fileIndex.containsKey(urn)) ||
          ((fileIndex.containsKey(urn)) && (((String)fileIndex.get(urn)).compareToIgnoreCase("")==0)
              && (!doi.equals(""))) )
          fileIndex.put ( urn.trim(), doi );
   }

   /**
     * save - write the fileIndex hashtable to file
     * As a side effect, this routine will make sure there actually
     * is a Surrogate in storage before storing a record that involves
     * it.  For failsafe capture of data, use the dump() routine.
     * This routine should be called by applications at the end of the whole
     * run.
     */
   public static void save ( ) {
      if ( !isLoaded ) return;
      String filename = CONFIG.CreationDB;
      System.err.println(ME+"saving the fileIndex hashtable ("
      + fileIndex.size() + "elements) to " + filename);
      int n=0; String thisUrn, thisDOI;
      ObjectOutputStream oos = null;
      try {
	 FileOutputStream fos = new FileOutputStream ( filename );
	 oos = new ObjectOutputStream ( fos );
         for ( Enumeration e = fileIndex.keys(); e.hasMoreElements(); ) {
            String d = (String)e.nextElement();
            String c = (String)fileIndex.get(d);
//	    if ( existsSurrogate(c) ) {
	       oos.writeUTF(d);                             // String, the urn
               oos.writeUTF(c);        // String, the DOI
	       n++;
//	    }
         }
      } catch (Exception e) {
	 System.err.println(ME+"caught exception " + e.toString()
	 + " while writing indexFile to disk");
      }
      try { oos.close(); } catch (Exception e){}
      System.err.println(ME+"wrote " + n + " records to file");
   }

   /**
     * reload - initialize the hash table from storage (partially)
     * @param - String which is the filename in storage of where the
     *    creation database has been stored
     * format for the  record in file is
     *   (urn, Surrogate DOI, ordinal)
     * The ordinal is 0 for the Surrogate itself, 1... for a reference
     * in that Surrogate.  Load into fileIndex hashtable.
     * Thus in principle information does not need to be stored in
     * both places: the Surrogate object and the database.
     */
   public static void reload ( ) {
      String filename = CONFIG.CreationDB;
      if (DEBUG)
      System.err.println(ME+"reloading hashtables from " + filename);
      int n = 0; String thisUrn,thisDOI;
      try {
	 FileInputStream fis = new FileInputStream ( filename );
	 ObjectInputStream ois = new ObjectInputStream ( fis );
	 while ( true ) {
	    // read next record on the input stream
	    thisUrn = ois.readUTF();  // a string
	    thisDOI = ois.readUTF();  // a string
//	    if ( existsSurrogate ( thisDOI ) ) {
	       fileIndex.put ( thisUrn, thisDOI );
               creations.put(thisUrn, Creation.reconstruct(thisDOI, thisUrn));
	       //System.err.println(ME+thisUrn+"   "+thisDOI+"   "+index);
	       n++;
//	    }
	 }
      } catch (Exception e) {}
      if (DEBUG)
      System.err.println(ME+"\n"+n+" works reloaded from disk\n\n");
      isLoaded = true;
   }

    /**
     * mergeCreation - check if the urn can be merged with any others in current
     *                 CreationDatabase
     * @param - urn: the urn need to be merged
     *          authorLength: in order to find out the substring of urn in the case
     *                      of missing author, missing date and missing title
     * @return - the complete urn if merge happens, null if merge does not happen
     */
    public static String mergeCreation(String urn, int authorLength) {
    for( Enumeration e = fileIndex.keys(); e.hasMoreElements(); ) {
      String d = (String) e.nextElement();
      // compare this d with urn to see if they are possible to match
      if(urn.startsWith("*") && !d.startsWith("*")) {// the current urn missing authors
        // the substring of urn equals to the substring of d without author
        if(d.indexOf(urn.substring(1)) != -1) { // d includes the substring of urn
          return d;
        }
      }
      else if(mergable(urn, d, authorLength))
        //return (Creation)creations.get(d);
        return d;
    }
    return null;
  }

  /**
   * update - update the creationDatabase after merge urn happens
   *          including both creations and fileIndex hashtables
   * @param - _newUrn: the urn after merge
   *          _oldUrn: the old urn that's still in CreationDatabase
   */
  public static void update (String _newUrn, String _oldUrn) {
    if (creations.containsKey(_oldUrn)) {
      Creation c =(Creation)creations.remove(_oldUrn);
      creations.put(_newUrn, c);
    }

    if (fileIndex.containsKey(_oldUrn)) {
      String v = (String)fileIndex.remove(_oldUrn);
      fileIndex.put(_newUrn, v);
    }
  }


   //----------  PRIVATE ROUTINES ----------------------------
   private static String dumpAuthors (Author[] authors) {
      String result = "";
      if ( authors != null) for (int i=0; i<authors.length; i++)
         result = result + authors[i].getLastName() + "   ";
      return result;
   }

   // used by save() to determine there is actually a surrogate
   // where the doi says there is
   private static boolean existsSurrogate ( String doi ) {
      String filename = CONFIG.REPOSITORY+File.separator
			+doi+File.separator+"Surrogate";
      File f = new File (filename);
      return f.canRead();
   }

   // used to wrap together a string and an integer for the
   // fileIndex hashtable
/*   private static Vector wrap ( String doi, int index ) {
      Vector result = new Vector();
      result.add(doi);
      result.add(new Integer(index));
      return result;
   }
*/
   /**
    * getDOI
    * @param docURN - the URN of the creation
    * @return DOI - the doi of the file
    */
    public static String getDOI (String docURN) {
        if ( fileIndex.containsKey(docURN) ) {
	        String doi = (String)fileIndex.get(docURN);
            return doi;
        }
        else {
            return null;
        }
    }

    private static boolean mergable (String urnIncomplete, String urnComplete, int authorLength) {
      int indexOfStar;
      if(((indexOfStar = urnIncomplete.indexOf('*')) == -1)
          || urnComplete.startsWith("*"))
        return false;
      else if(authorLength == 0) { //the urnIncomplete miss author
        if(urnComplete.indexOf(urnIncomplete.substring(1)) != -1)
          return true;
      }
      else {
        if(indexOfStar < authorLength) {
          // need to fill in authors
          if(urnIncomplete.substring(authorLength).equalsIgnoreCase(
                urnComplete.substring(authorLength)))
            return true;
        }
        else if(indexOfStar >= authorLength && indexOfStar < (authorLength+4)) {
          // need to fill in Date (avoid string index out of bounds)
	  try {
          if(urnIncomplete.substring(0, authorLength).equalsIgnoreCase(
                urnComplete.substring(0, authorLength))
            && urnIncomplete.substring(authorLength+1).equalsIgnoreCase(
                urnComplete.substring(urnComplete.length(),authorLength+4)))
            return true;
	  } catch (Exception e){return false;}
        }
        else if(indexOfStar >= authorLength+4) {
          // need to fill in title
          if(urnIncomplete.substring(0, authorLength+4).equalsIgnoreCase(
                    urnComplete.substring(0, authorLength+4)))
            return true;
        }
      }
      return false;
    }
  }
