Surrogate.java

The Surrogate class is the main class in the Linkable.API package. Construction of a Surrogate analyzes an archive Item to extract its metadata and references. Package class names are in red and uses of them are in blue. Uses of related package class names and methods are in green.

The Linkable.API uses two other related packages: Linkable.Analysis actually does all the work of parsing items in an archive. Linkable.Utility manages auxiliary databases.


// API for linking digital objects (specifically, creations)
package Linkable.API;

import java.util.Vector;
import Linkable.Analysis.*;
import Linkable.Utility.*;

public class Surrogate {

   private static final String ME = "Surrogate: "; // for diagnostics

   private BibData myData;         // URN and metadata for this item
   private Reference[] refList;    // the references in this item
   private Vector knownCitations;  // Growing list of citations
   private String myURL;           // Network address of our item
   private MIMEfile localMetaData; // Original text fragments in this
                                   // item corresponding to title, etc.

   // Constructor - make a surrogate for the item at this network address
   public Surrogate(String url) {
      this ( url, url );
   }

   // Constructor - make a surrogate for the item at the local address
   // specified by the first string, with the network address in the
   // second string (needed for processing local copies of archives)
   public Surrogate ( String localURL, String netURL ) {
      System.out.println(ME+"making Surrogate for item "+localURL);
      myURL = netURL;

      // Get an analyzer specific for this kind of item
      RefLinkAnalyzer a = getRefLinkAnalyzer( localURL );
  
      // Has our creation been seen before?
      DID docID =  a.getMetaData();
      myData = new BibData ( docID );

      // Use the Analyzer to construct the remaining fields
      localMetaData = a.buildLocalMetaData( myData );
      refList = a.buildRefList( myData );
      knownCitations = a.buildCitationList( docID );
      
      // All field's in BibData should now have been filled in by "cooked data"
      // Add this to or merge this with the database
      CreationDatabase.stashCreation ( myData.getCreation() );
   }

   // This routine returns an XML file which
   // includes the original contents of the Item plus reference links
   // as XLINKs (future: openLinks?).
   public byte[] getLinkedText () { return null; }

   // getReferenceList returns this item's references, in both original
   // and in cannonical form with the Santa Fe 8 fields.
   public byte[] getReferenceList(){ return null; }

   // getCurrentCitationList returns the list of known citations of
   // this document.
   public byte[] getCurrentCitationList () { return null; }

   // getMyData returns a pointer to the bibliographic data for this item,
   // which includes this item's URN and OAMS metadata.
   public byte[] getMyData() { return null; }

   // getRefID implements Carl's "Is this one of your references?" question
   // as well as his "How do you reference this?" question
   // If the answer is no, getRefID returns null.
   public byte[] getRefID ( BibData b ) { return null; }

   // getCitationID implements Carl's "am I one of your citations?" question
   // If the answer is no, or unknown, getCitationID returns null
   public byte[] getCitationID ( BibData b ) { return null; }

   // getRelatedPapers is a placeholder for methods that return
   // co-cited, co-referenced papers of this one
   public BibData[] getRelatedPapers() { return null; }

   // TBD
   public String toString() {
      return "\n" +
             "";
   }

   // UTILITY METHODS

   // Given a Reference, construct from it a new Citation and return it
   // The CiteEnum type should be REFERENCE because we found the citation
   // among the references belonging to another work.
   // Both the Reference object and the new Citation object correspond to
   // the same work.
   protected static Citation buildCitation ( Reference r ) { 
      return new Citation ( r.getDocID(), r.getContexts(), CiteEnum.REFERENCE );
   }

   // Add this Citation to our knownCitations
   protected void addCitation ( Citation c ) { }

   // PRIVATE METHODS

   // Construct an analyzer for the item, depending on its MIME type
   private RefLinkAnalyzer getRefLinkAnalyzer ( String localURL ) {
      System.out.println(ME+"in getRefLinkAnalyzer...");
      // if MIME type is HTML
      return new HTMLAnalyzer ( this );
   }

   // Use the MetaData to look this item up in the creation database
   // Return the assocated document it.  Add doc to database as needed.
   private String databaseLookup( MIMEfile mf ) {
      System.out.println(ME+"in databaseLookup...");
      return null;
   }

   // Give the id of the document, construct the BibData item
   private BibData buildBibData ( String docID ) {
      System.out.println(ME+"in buildBibData...");
      return null;
   }

}
// BibData for this item can be used to get the associated creation
package Linkable.API;

import Linkable.Utility.*;            // for database lookup

public class BibData {
   private String creationPointer;  // used to access the creation
   // constructor
   public BibData ( String cp ) {
      creationPointer = cp;
   }

   // accessor functions
   /**
    * BibData - table lookup to find URL and BibData 
    * @returns the Creation object corresponding to this archive Item
    **/
   public Creation getCreation() { 
      return CreationDatabase.fetchCreation( creationPointer );
   }

   public String toString() {
      return "" + creationPointer + "";
   }
}

// Reference encapsulates data about a reference contained in a Creation

package Linkable.API;

public class Reference {
   private BibData referenceData;  // URI and XML of the reference
   private int ordinalNumber;      // Which reference is this 1..N
   private String origRef;         // How the reference appeared in the text
   private String context[];       // Context(s) for this reference

   // Constructor returns a new Reference Object 
   // (Note: bd is stored by reference[sic], but this is safe because
   // BibData has no set methods, so it is effectively immutable)
   public Reference ( BibData bd, int n, String ref, String[] c ){
      referenceData = bd;          
      ordinalNumber = n;
      origRef = ref;
      context = new String[c.length];
      System.arraycopy( c, 0, context, 0, c.length );
   }

   /** returns the BibData (i.e. doc id) of this reference */
   public BibData getDocID() { return referenceData; }

   /** returns the Contexts in which this reference occurred */
   public String[] getContexts () {
      String[] result = new String[context.length];
      System.arraycopy ( context, 0, result, 0, context.length );
      return result;
   }

   public String toString() {
      return "\n" +
             "  " + referenceData.toString() + "\n" +
             "  " + "" + ordinalNumber + "\n" +
             "  " + "" + origRef + "\n" +
             "  " + "\n" +
             listContexts("    ") +
             "  " + "\n" +
             "";
   }

   // PRIVATE METHODS

   private String listContexts(String pad) {
      String result = "";
      for ( int i=0; i < context.length; i++ )
         result = result + pad + "" + context[i] + "\n";
      return result;
   }

}

// A MIME file has a text file and a MIME type.  It is suitable for
// holding text/xml among other things.  Like Strings, MIMEfiles are
// immutable.  The only way to change it is to construct a new one
// out of an old one.

package Linkable.API;

public class MIMEfile {
   private String MIMEtype;
   private byte[] file;

   // Constructor
   public MIMEfile ( String m, byte[] f ) {
      MIMEtype = m;
      file = new byte[f.length];
      System.arraycopy(f,0,file,0,f.length);
   }

   /** return the MIME type of this MIMEfile */
   public String getMIMEtype(){
      return MIMEtype;
   };

   /** return a copy of this MIMEfile */
   public byte[] getFile(){
      byte[] result = new byte[file.length];
      System.arraycopy( result, 0, file, 0, file.length );
      return result;
   }

   public String toString() {
      return "\n" +
             "  " + MIMEtype + "\n" +
             "  " + new String(file) + "\n" +
             "";
   }

}

// Citation is a class which contains information about a creation known
// to have cited this document.  It contains as much information as can
// be gotten.  The context(s) is stored only for CiteEnums REFERENCE
// and RI (ResearchIndex).  REFERENCE means that the citation was discovered
// while processing an item in an open archive.

package Linkable.API;
public class Citation {
   private BibData citationData;    // URI and XML of the citing creation
   private String context[];        // context in the citing document
   private int how;                 // how this item was cited

   /** Constructor
    *@param BibData for the Citation
    *@param The contextx in which this item was cited
    *@param The citation type
    **/
   public Citation ( BibData bd, String[] c, int h ) {
      citationData = bd;
      context = new String [ c.length ];
      System.arraycopy(c,0,context,0,c.length);
      how = h;
   }

   /**
    * Accessor function for citation contexts
    * @returns String[] array of context strings
    **/
   public String[] getContext() {  // return all the contexts of this citation
      String[] result=new String[context.length];
      System.arraycopy (context, 0, result, 0, context.length);
      return result;
   }

   public String toString() {
      return "\n" +
             "  " + citationData.toString() + "\n" +
             "  " + "\n" +
             listContexts("    ") + 
             "  " + "\n" +
             "  " + listHow() + "\n" +
             "";
   }

   // PRIVATE METHODS

   private String listContexts(String pad) {
      String result = "";
      for ( int i=0; i < context.length; i++ )
         result = result + pad + "" + context[i] + "\n";
      return result;
   }

   private String listHow ( ) {
      if ( how == CiteEnum.REFERENCE ) return  "REFERENCE";
      else if ( how == CiteEnum.SCI) return  "From Citation Index";
      else if ( how == CiteEnum.RI ) return "From Research Index";
      else return "unknown";
   }
      
}

// CiteEnum is an enumeration of how we got this citation
package Linkable.API;
public class CiteEnum {
   public static final int REFERENCE = 1;  // reference in analyzed paper
   public static final int SCI = 2;        // found out in the SCI
   public static final int RI = 3;         // from CiteSeer
}

// Creation is the generic class containing information about a creation
// The fields are filled in on a best-effort basis.
package Linkable.API;
import Linkable.Utility.Author;
public class Creation {
   public static final int DOI = 1; // URN is DOI
   public static final int SYN = 2; // URN is synthesized from bibliographic data
   private String myURN;            // URI for this creation
   private int urnType;             // DOI? Synthetic?
   private String title;
   private AUTHOR[] authors;
   private String date;             // creation date (NOT oams:accession date)
   // Constructor returns an empty Creation object
   public Creation(){}
   // accessor functions for populating the BibData object
   public void setUrn ( String u ){
      myURN = u; 
   }
   // Functions to set various fields of the Creation object
   // TBD
   // Accessor methods for the various fields of the Creation object
   // TBD

   // Return the XML string for this creation
   public String toXML ( String pad ) {
      return pad + "\n" +
             "//TBD" +
             pad + "";
   }

   public String toString() {
     return "\n" +
            "  " + "myURN\n" +
            "  " + text.toString() + "\n" +
            "";
   }
}

Donna Bergmark, 2000-04-24, DRAFT 2