package uk.ac.soton.harvester;
import java.util.*;
import java.io.*;

/**
 *
 * CitationOutput s the superclass of each class that
 * puts the citation data onto the specified printwriter
 * in the one specific format requested by doTXT, doHTML or doXML.
 * The concrete subclasses are TXTOutput, HTMLOutput and XMLOutput.
 **/
abstract class CitationOutput {

    /**
     *
     * The citation attribute strings are filled in by the prepare method
     * from the data in the attribute markers stored in the deciter state.
     * djb - added url to the list of strings
     *
    **/
    String author="", date="", tit="", pages="?", pub="", vol="", iss="", publish="", place="", misc="", xxxid="", rest="", url="";


    /**
     *
     * state of the deciter, including the attribute markers used 
     * to define the output strings.
     *
    **/
    DeciterState ds;



    /**
     *
     * Standard constructor that simply stores the deciter state.
     * All other processing is on a per-citation basis.
     *
     * @param ds the current deciter state
     *
    **/
    CitationOutput(DeciterState ds){
        setDeciterState(ds);
        }


    /**
     *
     * Empty constructor to make an uninitialised object easily from the
     * dynamic loading code. Must be used in conjunction with the setDeciterState method.
     *
     * It is a logic error to use the empty constructor and then to fail to
     * initialise the CitationOutput object with the setDeciterState method.
     *
    **/
    CitationOutput(){
        }


    /*
     * It is a logic error to use the empty constructor and then to fail to
     * initialise the CitationOutput object with the setDeciterState method.
     *
     * @param ds the current deciter state
     */
    public void setDeciterState(DeciterState ds){
        this.ds=ds;
        }

    /**
     *
     * prepare is a method used by the subclasses 'output' methods to prepare
     * the attribute strings from the attribute markers in the deciter state.
     * It is a logic error to use the empty constructor and then to fail to
     * initialise the CitationOutput object with the setDeciterState method.
     *
     *
    **/
    void prepare(){
        String line=ds.line;
        int maxi=line.length()-1;

        Utils.DEBUG("M");

        int lasti=0;

        author=date=tit=pages=pub=vol=iss=publish=place=misc=
            xxxid=rest="";

        if(ds.authb>=0)author=detag(line.substring(ds.authb,ds.authe+1).trim());
        Utils.DEBUG("M1 "+ds.authe); if(ds.authe>lasti)lasti=ds.authe;

        if(ds.dateb>=0)date=detag(line.substring(ds.dateb,ds.datee+1));
        Utils.DEBUG("M2 "+ds.datee); if(ds.datee>lasti)lasti=ds.datee;

        if(ds.titb>=0)tit=detag(line.substring(ds.titb,ds.tite+1)).trim();
        Utils.DEBUG("M3 "+ds.tite); if(ds.tite>lasti)lasti=ds.tite;

        if(ds.volb>=0)vol=detag(line.substring(ds.volb, ds.vole+1));
        Utils.DEBUG("M4 "+ds.vole); if(ds.vole>lasti)lasti=ds.vole;

        if(ds.issb>=0)iss=detag(line.substring(ds.issb, ds.isse+1));
        Utils.DEBUG("M5 "+ds.isse); if(ds.isse>lasti)lasti=ds.isse;

        if(ds.pagb>=0)pages=detag(line.substring(ds.pagb, ds.page+1));
        Utils.DEBUG("M6 "+ds.page); if(ds.page>lasti)lasti=ds.page;

        if(ds.pubb>=0 && ds.pube>ds.pubb)pub=detag(line.substring(ds.pubb, ds.pube+1));
        Utils.DEBUG("M7 "+ds.pube); if(ds.pube>lasti)lasti=ds.pube;

        if(ds.miscb>=0)misc=line.substring(ds.miscb, ds.misce+1);
        Utils.DEBUG("M8 "+ds.misce); if(ds.misce>lasti)lasti=ds.misce;

        if(ds.xxxHint && ds.xxxb>=0)xxxid=line.substring(ds.xxxb, ds.xxxe+1);
        Utils.DEBUG("M9 "+ds.xxxe); if(ds.xxxe>lasti)lasti=ds.xxxe;

        if(ds.publishb>=0)publish=line.substring(ds.publishb, ds.publishe+1);
        Utils.DEBUG("M10 "+ds.publishe); if(ds.publishe>lasti)lasti=ds.publishe;

        if(ds.placeb>=0)place=line.substring(ds.placeb, ds.placee+1);
        Utils.DEBUG("M11 "+ds.placee); if(ds.placee>lasti)lasti=ds.placee;

        if(maxi-1<lasti)lasti=maxi-1;
        rest=line.substring(lasti+1);
        Utils.DEBUG("M12 '"+rest+"'");

        // added by djb to set the option url address
        if(ds.urlb>=0)url=line.substring(ds.urlb, ds.urle+1);
        Utils.DEBUG("M13 "+ds.urle); if(ds.urle>lasti)lasti=ds.urle;
        }

    /**
     *
     * output renders the citation onto an output file according
     * to some format.
     * @param out PrintWriter destination
     * @return rest of the line (the remainder after the last output section)
     *
    **/
    abstract String output(PrintWriter out);


    /**
     *
     * pre allows the outputter to emit a header or preamble for the
     * article citations as a body. This default definition does nothing.
     *
     * @param out the PrintWriter output stream
     *
    **/
    void pre(PrintWriter out){
        }

    /**
     *
     * post allows the outputter to emit a footer or postamble for the
     * article citations as a body. This default definition does nothing.
     *
     * @param out the PrintWriter output stream
     *
    **/
    void post(PrintWriter out){
        }

    /**
     *
     * splitPages outputs the identified page range on the given
     * output stream. e.g. "37--49" may appear as
     * &lt;pages&gt;&lt;firstpage&gt;37&lt;/firstpage&gt;lastpage&gt;49&lt;/lastpage&gt;&lt;/pages&gt;
     *
     * @param pages the page range substring lifted directly from the citation
     * @param Output the output stream on which to output the data
    **/
    protected void splitPages(String pages, PrintWriter Output){
        int max=pages==null ? 0 : pages.length();
        if(max<1){
            if(ds.doXML)Output.println("<pages><firstpage></firstpage><lastpage></lastpage></pages>");
            return;
            }

        int j=0;
        while(j<max && !Character.isDigit(pages.charAt(j)))j++;

        if(ds.doXML)Output.print("<pages><firstpage>");
        else if(ds.doTXT)Output.print("\tStart=");
        while(j<max && Character.isDigit(pages.charAt(j))){
            Output.print(pages.charAt(j));
            j++;
            }
        if(ds.doXML)Output.print("</firstpage>");
        else if(ds.doTXT)Output.print(" ");

        while(j<max && !Character.isDigit(pages.charAt(j)))j++;

        if(ds.doXML)Output.print("<lastpage>");
        else if(ds.doTXT)Output.print("End=");
        while(j<max && Character.isDigit(pages.charAt(j))){
            Output.print(pages.charAt(j));
            j++;
            }
        if(ds.doXML)Output.println("</lastpage></pages>");
        else if(ds.doTXT)Output.println();
        }

    /**
     *
     * splitAuthors is one of the most complex methods of the deciter class.
     * It takes a string containing many authors and outputs them as individual
     * authors in canonical form. e.g. "Carr L. A. and J. M. Cook" may be
     * output as "&lt;author surname="Carr" initials="L.A."&gt;
     * &lt;author surname="Cook" initials="J.M."&gt;" .
     * The complexity comes from the need to deal with many different author
     * naming schemes as well as the complexity of names themselves.
     *
     * @param a0 the string containing the original author substring lifted
     *      directly from the input line.
     * @param ds the deciter state in which the author splitting occurs.
     *       this is required because the method has just been made static
     *      so that the XMLOutput object can use it.
     * @param Output the output stream to which the author output is to be
     *      written.
    **/
    protected static void splitAuthors(String a0, DeciterState ds, PrintWriter Output){
        //this string is a list of authors.
        //each one is either in the form "Surname, I. N.,"
        //or "I. N. Surname,"
        //or "Surname, Leslie A., "
        //or "Surname, Leslie Alan, "

        //if OP=null then just find the string at the end of the authors
        //this is atemporary hack until the code can be rearanged
        //it is usd particularly in XXX, or other cases where the
        //end of the authors can't be determined by a suprficial
        //syntax scan
        boolean doOP=Output!=null;

        if(ds.noForenameHint)ds.notAuthor=null;

        if(a0.length()<2)return;

        //zerothly, sanitize the list: '; ', ' & ', ' and ' -> ', '
	// djb " et al." -> "; et al."
        a0=a0.trim();
        Utils.DEBUG("Author splitting->"+a0+"<-");
        String authors="";
        int c=0;
        int smax=a0.length();

        while(c<smax){
            String temp=a0.substring(c);
            if(temp.startsWith("; ")){
                authors+=", ";
                c+=2;
                }
            else if(temp.startsWith(" &amp; ")){
                authors+="; ";
                c+=7;
                }
            else if(temp.startsWith(" & ")){
                authors+="; ";
                c+=3;
                }
	    else if(temp.startsWith(" et al.")){    // djb
		authors+="; et al.";
		c+=7;
		}
            else if(temp.toLowerCase().startsWith(" and ")){
                authors+="; ";
                c+=5;
                }
            else if(temp.charAt(0)=='<'){
                c++;
                while(c<smax && a0.charAt(c)!='>') c++;
                c++;
                }
            else{
                authors+=a0.charAt(c);
                c++;
                }
            }
        authors+=",";

        Utils.DEBUG("Author2 splitting->"+authors+"<-");
        //first, create a list of tokens
        StringTokenizer st = new StringTokenizer(authors);
        boolean newname=true;
        boolean etAl=false;         // djb
        boolean finishOnSurname=true;
        boolean finishOnComma=false;
        String surs="", inits="", surpre=null;
        String nextToken=null;
        int nameNum=1;
        ds.firstAuthor="";
        String s;
        while(nextToken!=null || st.hasMoreTokens()){
            if(nextToken==null) s=st.nextToken();
            else{
                s=nextToken;
                nextToken=null;
                }

            int max=s.length()-1;
            if(max==0)Utils.DEBUG("BAD AUTHOR TOKEN=='"+s+"'");

            boolean initial=Utils.isInitial(s);
            boolean surname=!initial;
            boolean comma=s.charAt(max)==',';
            boolean fullstop=s.charAt(max)=='.';
            boolean reallySurname=false;
            boolean actuallyAnd=false;

            if(s.charAt(max)==';'){
                comma=true;
                actuallyAnd=true;
                }

            if(newname){
                surs="";
                inits="";
                finishOnSurname=initial;
                finishOnComma=false;
                Utils.DEBUG("New author name");
                if(ds.firstNameFirstHint && !comma && nameNum>1){    
                    /** sometimes authors get it wrong! **/
                    /**if commaed, it must be a surname **/
                    surname=false;
                    finishOnComma=true;
                    finishOnSurname=true;
                    }
                } // if (newname) ...

            else { // djb for first author = [last, first,] or [first I. last,]
            // as well as [last, I.,] and [last, I. J.,]
               Utils.DEBUG("next author token=='"+s+"', initial " + initial
	       +", comma " + comma + ", inits='" + inits + "', surs '"
	       + surs + "'");
               if ( nameNum == 1 ) {
                  if ( initial && !comma ) {      // 10/30/2000
                      surname = false;
                      finishOnComma=true;
                      finishOnSurname=true;
		      if ( surs.indexOf(",") == -1) { // 12/11/00
                         inits+=Utils.toInitials(surs);
                         surs="";
		         }
                      } 
                  else {
                      finishOnComma = true;
                      // surname = false;
                      // 27 Oct 2000 - but handle case of
                      // first author = I. N. Lastname
                      surname = !inits.equals("")&&!initial;
                      }
                  } // nameNum=1 ...
               } // else djb stuff

            if(Utils.lowerCaseNameComponent(s) && !comma){
                surpre=(surpre==null?s:surpre+" "+s);   // djb 10-30-2000
                continue;
                }

	    // 10-30-2000   "First I. " check
	    // 12-11-2000   But also consider "last, I. J.,"
	    if ( initial && !comma ) {
                // We have "First I. ".  Back up
                if ( !surs.equals("") 
		     && surs.charAt( surs.length()-2 )!=',') {
                    inits=Utils.toInitials(surs); surs="";
		    }
                finishOnSurname = true;
	        } 

            boolean last=(nextToken==null && !st.hasMoreTokens()) ||
                     (finishOnComma && comma) ||
                     (finishOnSurname && surname) ||
                     (!finishOnSurname && initial) ||
                     actuallyAnd;

            Utils.DEBUG("Got author token '"+s+"' surname="+surname+" last="+last
	    + ", surs '"+surs+"', inits'"+inits+"'");

            /**
            need to cater for "Carr, Leslie A."
            ie inits is initials and forenames
            **/
            if(newname && !comma)finishOnComma=true;

            // This is REALLY a surname if its the start of
            // a name, and also the next token is
            // an DEFINITELY a set of initials.
            //This caters for "Carr LA," and others
            // djb - or if it is the start of the first name 
            // and not an initial
            // djb - or if it follows a set of initials
            if( st.hasMoreTokens() &&
               ( ( surname && !inits.equals("") )
                 || (surname && newname ))
              ) {
                nextToken=st.nextToken();
                reallySurname=Utils.isInitial(nextToken)
                || ( comma && surname && !inits.equals("") )
                || (nameNum==1 && !initial);  // djb
                // djb [First Last] where "Last" is next token
                if (!comma && !Utils.isInitial(nextToken))
                   reallySurname=false;

                Utils.DEBUG("Really checking surname '"+s
                   +"' against '"+nextToken+"' gives "+reallySurname);
                if (s.equals("et") && nextToken.startsWith("al") ) { // djb
                     Utils.DEBUG("found et al.");
                     etAl = true;
                     }
               } // if st.hasMoreTokens && ...

            //in XXX, two Full Names in a row indicates you have started a journal!
            if(ds.noForenameHint && newname && surname && !reallySurname){
                Utils.DEBUG("XXX problem surname at '"+s+"' against '"+nextToken+"' gives "+reallySurname);
                int commapos=s.indexOf(',');
                if(commapos<0)ds.notAuthor=s;
                else ds.notAuthor=s.substring(0,commapos);

                break;
                }

            if((reallySurname || (newname && comma) || (surs.length()==0 && last)) && surname){
                surs+=s+" ";
                }
            else {
                if(max>0 && Character.isLowerCase(s.charAt(1)))
                    inits+=s.charAt(0)+".";
                else inits+=Utils.toInitials(s);
                }

            if(comma && (finishOnComma || last)){
                // DJB - don't do anything with et al.
                if ( etAl ) newname=true; 
		else {
                    String o;
                    int p;
                    char pch;
                    p=surs.length()-1;
		    while(p>0 && ((pch=surs.charAt(p))==',' || pch==' ' || pch=='.' || pch==';'))p--; p++;
                    o=surs.substring(0,p);
                    if(surpre!=null){
                        o=surpre+" "+o;
                        surpre=null;
                        }

                    if(nameNum==1)ds.firstAuthor=o;

                    if(doOP && ds.doXML){
                        Output.print("<author ");
                        Output.print("lastname=\""+Utils.PCDATA(o)+"\" ");
                        }
                    else if(doOP && ds.doTXT){
                        Output.print("\tSurname='"+o+"' ");
                        }

                    p=inits.length()-1;
                    while(p>0 && (inits.charAt(p)==',' || inits.charAt(p)==' '))p--; p++;
                    o=inits.substring(0,p);
                    if(nameNum==1)ds.firstAuthor+=", "+o;
                    if(doOP && ds.doXML){
                        Output.print("initials=\""+o+"\"");
                        if(ds.extended)
                            Output.println(">"+a0+"</author>");
                        else Output.println("/>");
                        }
                    else if(doOP && ds.doTXT){
                        Output.println("Initials='"+o+"'");
                        }
                    newname=true;
                    nameNum++;
                }}
            else{
                newname=false;
                } // if comma && ...

            } // end while...
        } // splitAuthors

    /**
     *
     * detag removes tags from an HTML-style string. These tags are in practise
     * just the font-change tags &lt;b&gt; and &lt;i&gt;. It is used as a 
     * final stage filter after all the sections have been recognised in the
     * original string, and just prior to their final output.
     *
    **/
    protected static String detag(String s){
        if(s==null)return null;

        StringBuffer res=new StringBuffer(64);
        int c=0;
        int slen=s.length();
        char ch;

        while(c<slen){
            ch=s.charAt(c);
            if(ch=='<'){
                c++;
                while(c<slen && (ch=s.charAt(c))!='>')c++;
                c++;
                continue;
                }
            res.append(ch);
            c++;
            }

        return res.toString();
        }


    }
