// bergmark - April 2000 - The Reference Linking Project

package Linkable.API;

// Creation is the generic class containing information about a work.
// The fields are filled in on a best-effort basis.  The CreationDatabase
// holds information about the creations, and the fields become more
// complete as information is discovered.
// Modifications: 2000-04-25: replace MIMEfile text with individual fields
// (This raises the question of whether Author shouldn't also be part of the API)
//   2000-07-28: No more DIDs.  No more urn types.  All urns are synthetic.
//               MIMEfile is now just a string.
//   2000-09-28: New call to GenXML to get Dumblin Core
//   2000-10-03: New call to CreationDatabase to update the index List
//   2000-11-21: New call to static reconstruct () routine, reduce multiple
//               asterisks to one
//   2000-12-08: Add checks for multiple URLs to setDisplayID
//   2000-12-14: Add "getFirstAuthorLastName()" routine
//   2001-03-29: Initialize date to "" to avoid null pointer exceptions
//   2001-03-29: doWork should handle http values, too.
//   2001-04-04: doWork should resurrect titles without newlines in them

// Note: there should be exactly one Creation object per work.
// A work always has a URN synthesized from the work's metadata.
// A work might correspond to more than one DOI (e.g. if there are several
// copies of this work in different archives).
// We always strive to have a single Creation object for a work, however.

// Invariant: if a Creation has no URN, then it is not in the Creation
// Database and all attempts to set any fields are automatically consistent.

// TBD:  All the "set" functions should check for consistency and return
// a boolean result: true for consistent and false for not.  Or is this
// better left entirely as a database issue?

// TBD:  Arrange for a way for the Surrogate to pass in a DOI for
// this creation, if one is available

import Linkable.Utility.*;

import java.util.Vector;
import java.io.File;

// needs xml-tr2/xml.jar in $CLASSPATH
import org.w3c.dom.*;

/**
 * The Creation object represents a single work, in the abstract sense.
 * It contains the work's title, urn, authors, publication date, urls
 * if any are known, and its DOI, if known.  The database key is the
 * urn.
 */
public class Creation {

   public static final String ME = "Creation: ";
   public static boolean DEBUG = CONFIG.DEBUG;

   private String myURN;            // URN for this creation
   private String title;            // Title of this creation
   private Author[] authors=null;   // Author(s) of this creation
   private String date="";          // Creation Date (yyyy-mm-ss format)
   private boolean hasURN = false;
   private Vector displayID;        // archive item addresses, if known
   private String doi;              // DOI for Creation, if Surrogate
   public int iExtended = 0;


   /** Constructor
    * @returns an empty Creation object
    */
   public Creation(){ hasURN = false; }

   /** Constructor
    * @param String giving the document's URN
    * @returns a Creation initialized to a particular URN
    */
   public Creation ( String docURN ) {
      myURN = docURN;
      hasURN = true;
   }

   // accessor functions for populating the Creation object

   /** adds a url to this work's display list
    * @param the url, as a String http://
    */
   public void setDisplayID ( String url ) {
      if ( url == null || url.equals("") ) {
	 if (DEBUG)
         System.err.println(ME+"?? asked to add an empty displayID!");
         return;
      }
      // vet the url - just one http: per url
      int i = url.indexOf("http:");
      int j = url.indexOf("http:",i+4);
      if ( j != -1 ) {
	 this.setDisplayID( url.substring(j) );
	 url = url.substring(0,j);
	 // special case all the connectors we've seen between multiple urls
	 if ( url.endsWith(" and  ") ) url = url.substring(0,j-6);
      }
      if (displayID == null) {
         displayID = new Vector(); displayID.add( url );
      } else if ( !displayID.contains(url) )
         displayID.add( url );
      if (DEBUG) System.err.println(ME+"in setDisplayID with\n"
	 +this + "added " + url + " as an identifier");
      }         // setDisplayID

   /** sets the DOI for this work
    * @param the DOI, as a String (without the "doi:" prefix)
    */
   public void setDoi ( String d ) {
      doi = new String(d);
      if ( DEBUG ) System.err.println(ME+"has set my DOI to " + d);
   }

   /** sets the URN for this work
    * @param the URN, as a String (without the "urn:" prefix).
    * The URN is currently author+year+title (see synthesizeURN)
    */
   public void setUrn ( String u ){
 /*     if ( hasURN ) {
	 //TBD throw an exception}
	 System.err.println(ME+"setUrn when we already have one!");
      }
      else {
   */      myURN = new String (u);
	 hasURN = true;
	 if ( DEBUG )
         System.err.println(ME+"setUrn to " + u );
//      }
   }

   /** sets the title for this work
    * @param the title, as a String
    */
   public void setTitle ( String s ) {
      if ( title != null ) {
	 System.err.println(ME+"in setTitle, old title:\n"+title
	 +"\nnew title:\n" + title);
      } else title = s;
   }

   /**
    * addAuthor - appends this Author object to the list of authors for
    * this work.  Does not update the Author database.
    * @param the Author to be added.
    */
   public void addAuthor ( Author a ) {
      if ( authors == null ) {
	 authors = new Author[1];
	 authors[0] = a;
	 return;
      }
      // there exist some authors already
      for ( int i = 0; i < authors.length; i ++ ) {
	 if ( authors[i].equals(a) ) return;
      }
      // none of the current authors matches Author "a"
      Author[] oldAuthors = new Author[authors.length];
      System.arraycopy ( authors, 0, oldAuthors, 0, authors.length );
      authors = new Author[authors.length+1];
      System.arraycopy ( oldAuthors, 0, authors, 0, authors.length-1 );
      authors[authors.length-1] = a;
   }

   /** sets the entire author list
    * @param the array of Author objects to be stored in this Creation
    * object.  This is assigned by reference, so it is possible to
    * change the Author list elsewhere in the system, behind the
    * Creation's back.
    */
   public void setAuthors ( Author[] alist ) {
      if ( alist != null ) {
         authors = alist;     // Warning: elements of the array are mutable
	 if (DEBUG)
         System.err.println(ME+"has set " + alist.length + " authors");
      } else {
	 if (DEBUG)
	 System.err.println(ME+"setAuthors given null alist");
	 authors = new Author[1];
	 authors[0]=new Author("Unknown");   // because OAMS requires author
      }
   }

   /** defines the publication date for this Creation object
    * @param The date, in yyyy-mm-dd format.
    */
   public void setDate ( String s ) {
      // Make sure s is in oams format (ccyy-mm-dd)
      // TBD: OAMS is dead.  Use DC format?  Allow empty mm, dd?
      date = MetaData.toOAMS (s);
      if (DEBUG)
      System.err.println(ME+"set date to " + date + " for creation "
      + "with URN <" + myURN + ">");
   }

   // accessor functions for fields in the Creation object

   /**
    * get URN - returns a URN, either synthetic of DOI, for this work
    * @returns the string of the URN, recently synthesize if necessary
    */
   public String getURN() {
      if ( hasURN ) return myURN;
      String a = (authors == null)? null : authors[0].getLastName();
      myURN = Creation.synthesizeURN( a, date, title );
      hasURN = true;
      return myURN.trim();
   }
   public String getTitle() { return title; }
   public String getDate() { return date; }
   public Author[] getAuthors() { return authors; }
   public String getFirstAuthorLastName() {
      Author a = authors[0];
      return a.getLastName();
   }

   /**
    * synthesizeURN - this is THE single place in the entire system
    * where document ID's are synthesized.
    *
    * @param author is the first author's last name
    * @param date is the date in oams format: yyyy-mm-dd or just the year
    * @param title is the String containing the complete title
    * @returns String containing the synthesized URN
    *
    * NOTE: recall that the second argument to substring is one beyond
    * the index of the last character to be copied.
    */
   public static String synthesizeURN
   (String author, String date, String title) {
      StringBuffer urn = new StringBuffer();
      if ( author == null || author.equals("")) urn.append("*");
         else urn.append(author.substring(0,Math.min(10,author.length())));
      if ( date == null || date == "" ) urn.append("*");
      else { // check that we got OAMS format
	 if (Character.isDigit(date.charAt(0)) &&
	     Character.isDigit(date.charAt(1)) &&
	     Character.isDigit(date.charAt(2)) &&
	     Character.isDigit(date.charAt(3)) )
             urn.append(date.substring(0,4));
	 else {
	    System.err.println(ME+"in synthesizeURN has bad date <"+date+">");
	    urn.append("*");
	 }
      }
      if ( title != null && title != "" )
         urn.append(title.substring(0,Math.min(20,title.length())));
      if (DEBUG)
      System.err.println(ME+"synthesized a URN: "
	 + (urn.toString()).toLowerCase());
      return (urn.toString()).toLowerCase();
   }

   /**
    * get URLs - returns a String[] array which contains all the
    * currently known URLs for
    * this work; initially it is the archive's display ID, if known.
    * Idea is to map this work's metadata to a URN; feed the URN to
    * a name handler; get back a list of URLs; return that as the
    * result of this routine.
    * @returns null if there are no URLs, otherwise an array of strings
    * that contains all the known URLs for this work.
    */
   public String[] getURLs() {
      // (Future) find more URLs, too.  Use google.  Use a name server.
      if ( displayID == null || displayID.size()==0 ) return null;
      else {  // convert the vector of URLs into an array
         return (String[]) displayID.toArray(new String[displayID.size()]);
      }
   }

   public String getDoi() {
    return doi;
   }

   // equals -
   /**
    * returns true if the specified creation is consistent with this one.
    * Missing fields are filled in as a side effect.
    * @param - the Creation object to be matched
    * returns boolean if the Creations were the same or consistent
    **/
   public boolean equals ( Creation c ) {
      if ( DEBUG )
      System.err.println(ME+"in equals with \n" + c.toString()
      + "\n vs: \n" + toString() );
      int matches = 0;
      if ( date != null )
      if ( !date.regionMatches(true,0,c.getDate(),0,4 ) ) return false;
      else matches ++;
      if ( title != null )
      if ( ! title.equals ( c.getTitle() ) ) return false;
      else matches ++;
      if ( authors != null ) {
         Author[] cAuthors = c.getAuthors();
         for (int i = 0; i < authors.length; i++ ) {
	    for ( int j = 0; j < cAuthors.length; j++ ) {
               if ( authors[i].equals(cAuthors[j] ) ) {
		  matches ++;
                  break;
	       }
	       // authors[i] matched none of cAuthors.  Bad sign.
//	       if ( authors.length <= cAuthors.length ) return false;
	    } // next authors[i]
	 } // end of authors
      }
      if ( matches < 2 ) return false;
      // Looks like we have a match.  Complete missing fields.
      return merge ( c );
   }

   // extendUrn -
   /**
    * returns true if this URN is synthetic and another title word could be
    * appended to it for uniqueness
    **/
   public boolean extendUrn() {
      myURN = myURN + "a";   // well, maybe not a title word
      this.iExtended++;
      return true;
   }

   // reconstruct -
   /**
     * Reconstructs a Creation object from XML data stored in files.
     * @param String that is the "DOI" for the surrogate who knows about this
     *        Creation
     * @param int that is 0 if the creation corresponds to the DOI itself,
     *        or i>0 for the i-th reference in the DOI Surrogate
     * @param Urn that is requestiong this reconstruction (may be incomplete)
     * @returns a Creation object
     * NOTE: it is guaranteed on entry that the Surrogate exists
     */

   public static Creation reconstruct ( String doi, String urn ) {

      // Directory containing the surrogate belonging to this DOI
      String filename = CONFIG.REPOSITORY+File.separator
                        +doi+File.separator+"Surrogate";

      // Read in the data from the appropriate file
      String fileData = null;
      if ( doi.compareTo("") != 0) fileData = MetaData.getData
                        ( filename + File.separator + "myData");
      else fileData = MetaData.getData
                        ( filename + File.separator + "refList");
      Document doc = MetaData.getDOM ( fileData );
      System.err.println(ME+"has read in an XML tree");

      // Gin up a new Creation and fill in its fields from "doc"
      Creation c = new Creation ( urn );

      // If has doi we are reconstructing a Surrogate creation
      if( doi.compareTo("") != 0) {
         c.setTitle ( MetaData.getValue(doc,"dc:title"));
         c.setDate  ( MetaData.getValue(doc,"dc:date"));
         c.setDoi (doi);
         String[] ids = MetaData.getValues(doc,"dc:identifier");
         for ( int i = 0; i<ids.length; i++ )
            if ( ids[i].startsWith("http:") )
               c.setDisplayID(ids[i].substring(5));
         String[] anames = MetaData.getValues(doc,"dc:creator");
         if(anames != null) {
            for ( int i = 0; i < anames.length; i++ ) {
              Author a = new Author ( anames[i] );
              if ( !AuthorDatabase.isInDatabase(a) )
                AuthorDatabase.stashAuthor(a);
              c.addAuthor ( AuthorDatabase.fetchSpecificAuthor( a ) );
            }
          }
      } else {
	 System.err.println(ME+"in reconstruct about to rebuild reference ");
	 Reference ref = Reference.reconstruct (doc);
      }

      return c;
   }

   // doWork -
   /** Reconstructs a Creation out of XML DOM tree with root <work>.
     * @param The root of the <work> subtree.
     * @returns a new Creation object for this work
     */
   protected static Creation doWork ( Node work ) {

         if (DEBUG) System.err.println(ME+
         "turning a <work> element into a Creation object");
         Creation c = new Creation();

         NodeList nl = work.getChildNodes();
         for ( int i = 0; i<nl.getLength(); i++ ) {
            Node n = nl.item(i);
            switch (n.getNodeType()) {
               case Node.ELEMENT_NODE:
                  String tag = n.getNodeName();
                  Node child = n.getFirstChild();  // text node
                  if ( child != null ) {
                     if ( tag.equals("dc:title") )
                        c.setTitle ( ((
			   child.getNodeValue()).replace('\n',' ')).trim() );
                     else if ( tag.equals("dc:identifier")) {
                        String text = child.getNodeValue().trim();
                        if ( text.startsWith("urn:") )
                           c.setUrn ( text.substring(4));
                        else if ( text.startsWith("doi:") )
                           c.setDoi ( text.substring(4) );
                        else if ( text.startsWith("http:") )
                           c.setDisplayID ( text );
                     }
                     else if ( tag.equals("dc:creator") )
                        c.addAuthor ( Author.reconstruct (n) );
                     else if ( tag.equals("dc:date") )
                        c.setDate ( child.getNodeValue());
if (DEBUG) System.err.println(ME+"has rebuilt " + tag +
" element node with value " + child.getNodeValue());
                  }
                  break;
               case Node.ATTRIBUTE_NODE:
                  break;
               case Node.TEXT_NODE:
                  break;
               default:
                  System.err.println("Unknown node type");
            }
         }
System.err.println(ME+"doWork has reconstructed the following creation:\n"
+ c.toXML(""));
         return c;
   }

   // toXML
   /** returns DublinCore XML string for this creation - we don't know displayID?
    * However, if this Creation corresponds to an archive item that is being
    * analyzed, then we should know that one URL.  That should be passed as
    * the display ID.  If this Creation is for a Reference or a Citation, then
    * it is proper to pass "" as the display ID.
    * Also it would be proper to pass "" as the DOI if there is no surrogate
    * The URN is used to look up URLs.
    * @param A string containing zero or more blanks to be used as a pad
    * @returns a String representing the XML for this Creation (work) object
    */
   public String toXML ( String pad ) {
      return pad + "<work xmlns:dc = \"http://purl.org/DC\">\n" +
	     GenXML.dc(title, date,myURN,displayID,doi,authors,pad+"   ") +
	     pad + "</work>\n";
   }

   /** like toXML, but adds an "id" attribute to the <work> element.
    * @param pad, passed on to vanilla toXML
    * @param the id, which could be a repository name and id
    * @returns a String representing the XML for this Creation (work) object
    */
   public String toXML ( String pad, String id ) {
      return pad + "<work id=\""+id+"\">\n" +
	     GenXML.dc(title, date,myURN,displayID,doi,authors,pad) +
	     pad + "</work>\n";
   }

   /** see toXML
    */
   public String toString() {
     return toXML ( "" );
   }

   // PROTECTED FUNCTIONS
   // Called by Reference to update the fileIndex hashtable in Creation
   // Database.  Conditions of call: Surrogate has just written itself
   // out to storage, including the Reference object using this Creation.
   protected void save ( String doi ) {
      CreationDatabase.save( myURN.trim(), doi );
   }

   /**
    * fetchURN - given an author, date (year), and title, return
    * zero or more Creations from the database.  The routine is
    * here because here is where the synthetic URN is defined,
    * along with wildcard characters.
    * @param author is the first author's last name
    * @param date is the date in oams format: yyyy-mm-dd or just the year
    * @param title is the String containing the complete title
    * @returns a vector of Creations whose keys match the given URN
    */
   protected static Vector fetchURN (String author, String date, String title) {
      Vector result = new Vector();

      Creation c = CreationDatabase.fetchCreation (
		   Creation.synthesizeURN (author,date,title) );
      if ( c != null ) result.addElement(c);

      c = CreationDatabase.fetchCreation (
		   Creation.synthesizeURN (author,"",title) );
      if ( c != null ) result.addElement(c);

      c = CreationDatabase.fetchCreation (
         Creation.synthesizeURN ("",date,title) );
      if ( c != null ) result.addElement(c);

      c = CreationDatabase.fetchCreation (
         Creation.synthesizeURN ("","",title) );
      if ( c != null ) result.addElement(c);

      return result;
   }

   /**
    * fetchURN returns a vector of creations that might match this name
    * @param String that is a synthetic URN
    * @returns a vector of Creations whose keys match the given URN
    */
   protected static Vector fetchURN ( String urn ) {
      Vector result = new Vector();
      Creation c = CreationDatabase.fetchCreation ( urn );
      if ( c != null ) result.addElement(c);

      // Determine start of author, date, and title
      int i = 0;  // start of author
      int j = 1;  // start of year
      int k = 2;  // start of title
      if ( !urn.startsWith("*") ) {
         while ( j < urn.length() && !Character.isDigit( urn.charAt(j) ) ) j++;
         // character at index j is a digit.  Should be a year
         k = j;
         while ( k<j+4 && Character.isDigit ( urn.charAt(k)) ) k++;
         if ( k != j+4 ) k = 2;
      }

      // Get creations who are only missing a author
      c = CreationDatabase.fetchCreation ( "*"+urn.substring(j) );
      if ( c != null ) result.addElement(c);

      // Get creation which is only missing the year
      c = CreationDatabase.fetchCreation
            ( urn.substring(0,j) + "*" + urn.substring(k) );
      if ( c != null ) result.addElement(c);

      // Get creation which has only the title
      c = CreationDatabase.fetchCreation ("**"+urn.substring(k));
      if ( c != null ) result.addElement(c);

      return result;
   }



   // PRIVATE FUNCTIONS

   /** Merge this creation with the given one.  Only the fields of the
    * Creation objects are changed.  The objects themselves do not change.
    */
   private boolean merge ( Creation c ) {
      System.err.println (ME + "in MERGE CREATIONS");
      // if date != null but c.getDate() is, then c.setDate(date);
      // else if date == null but c.getDate() is not, then date = c.getDate()
      // etc. for title and authors

      // merge date
      if (date != null && c.getDate() == null) {
        c.setDate(date);
      }
      else if(date == null && c.getDate() != null) {
        date = c.getDate();
      }

      // merger title
      if(title != null && c.getTitle() == null) {
        c.setTitle(title);
      }
      else if(title == null && c.getTitle() != null) {
        title = c.getTitle();
      }

      // merge author
      if(authors != null && c.getAuthors() == null) {
        c.setAuthors(authors);
      }
      else if(authors == null && c.getAuthors() != null) {
        this.setAuthors(c.getAuthors());
      }

      return true;
   }

}
