package uk.ac.soton.harvester;
import java.io.*;

/**
 * deciterstate class defines an object that holds all the state of the deciter 
 * especially the hints, the marker offsets.
 *
 **/
public class DeciterState {

	DeciterState(String id, String opts[]){
		setNewCitation(null);
		nCites=0;

		documentid=id;

		if(opts==null)return;
		for(int optn=0; optn<opts.length; optn++){
			String o=opts[optn];
			if(o.equals("-v"))Utils.setDebugging(true);
			else if(o.equals("-E"))extended=true;
			else if(o.equals("-F"))firstNameFirstHint=true;
			else if(o.equals("-M"))multiCiteSharesAuthorHint=true;
			else if(o.equals("-Md"))multiCiteMDashHint=true;
			else if(o.equals("-xxx")){
				xxxHint=true;
				noForenameHint=true;
				}
			else if(o.equals("-h")){
				doHTML=true;
				doXML=false;
				doTXT=false;
				}
			else if(o.equals("-x")){
				doHTML=false;
				doXML=true;
				doTXT=false;
				}
			else if(o.equals("-t")){
				doHTML=false;
				doXML=false;
				doTXT=true;
				}
			}
		}

	public void setNewCitation(String line){
		digb=-1; dige=-1;
		authb=-1; authe=-1;
		dateb=-1; datee=-1; endofdate=-1;
		titb=-1; tite=-1;
		pagb=-1; page=-1; 
		pubb=-1; pube=-1;
		volb=-1; vole=-1;
		issb=-1; isse=-1;
		miscb=-1; misce=-1;
		xxxb=-1; xxxe=-1;
		publishb=-1; publishe=-1;
		placeb=-1; placee=-1;
		urlb=-1; urle=-1;                   // djb

		this.line=line;
		if(line!=null)maxi=line.length()-1;
		}

	/**
	 *
	 * nCites holds the number of citations processed for the current article.
	 * It is only used to return to the calling environment for information
	 * (perhaps for debugging or validation purposes).
	 *
	**/
	protected int nCites=0;

	/**
	 *
	 * extended is a debugging relic which controls whether the original
	 * author string is emitted along with the rest of the XML output for
	 * immediacy of comparison.
	 *
	**/
	public boolean extended=false;

	// The following set of bools is all to do with the various hints

	/**
	 *
	 * xxxHint states that the article is from the XXX archive, ie is
	 * a physics preprint publication.
	 *
	**/
	public boolean xxxHint=false;

	/**
	 *
	 * noForeNameHint declares that it is unlinkely that a forename
	 * will be given with the surname. Explicitly set when xxxHint
	 * is set.
	 *
	**/
	public boolean noForenameHint=false;

	/**
	 *
	 * firstNameFirstHint declares that the citation style tends to put
	 * the first name before the surname, at least after the initial author
	 * has been dealt with (surnames always come first for first authors
	 * so that you can see the primary sort key).
	 *
	**/
	public boolean firstNameFirstHint=false;

	/**
	 *
	 * multiCiteSharesAuthorHint declares that the citations of a
	 * single author may be grouped together as a single entry. Each
	 * 'subentry' is recognised by a new year starting. This hint
	 * should be replaced with a more generic recognition scheme..
	 *
	**/
	public boolean multiCiteSharesAuthorHint=false;

	/**
	 *
	 * multiCiteMDashHint declares that the citations of a
	 * single author may appear to be grouped together as a single entry.
	 * Each 'subentry' in fact starts with 3 emdashes as a ditto mark.
	 * This hint should be replaced with a more generic recognition scheme..
	 *
	**/
	public boolean multiCiteMDashHint=false;

	/**
	 *
	 * MDashCiteSep is the 3-emdash sytring which is used to separate
	 * some forms of citation (see multiCiteMDashHint).
	 *
	**/
	final String MDashCiteSep="\u2014\u2014\u2014";


	/**
	 *
	 * hint_Author1 declares that a very simple scheme for recognising the
	 * extent of an author sequence is in force. Author sequences extend
	 * up to the first full stop.
	 *
	**/
	public final boolean hint_Author1=false;

	//
	// End of hinting booleans
	//


	// The following batch of variables control the extents of various
	// significant ranges within a citation. Each range is stored as a
	// pair of variables, one which marks the beginning and another the end.
	// The value -1 is used as a marked value to indicate "not set".

	/**
	 *
	 * digb and dige store the beginning and end offsets of the
	 * initial numbering string of the citation.
	 * (e.g. the '34' of "[34]" or "34.")
	 *
	**/
	public int digb=-1, dige=-1;

	/**
	 *
	 * authb and authe store the beginning and end offsets of the
	 * authors sequence of the citation.
	 *
	**/
	public int authb=-1, authe=-1;

	/**
	 *
	 * dateb and datee store the beginning and end offsets of the
	 * year substring of the citation.
	 * (e.g. the '1997' of "(1997)" or "1997b.")
	 *
	**/
	public int dateb=-1, datee=-1;

	/**
	 *
	 * endofdate stores the offset of the first significant character
	 * the year substring was matched. It is actually the returned value
	 * from the DoDate recogniser, subsequently used for the DoAuthors.
	 *
	**/
	public int endofdate=-1;

	/**
	 *
	 * titb and tite store the beginning and end offsets of the
	 * title substring of the citation.
	 *
	**/
	public int titb=-1, tite=-1;

	/**
	 *
	 * pagb and page store the beginning and end offsets of the
	 * page range substring of the citation.
	 * (e.g. the '19--27')
	 *
	**/
	public int pagb=-1, page=-1;

	/**
	 *
	 * pubb and pube store the beginning and end offsets of the
	 * publication (ie journal) substring of the citation.
	 * (e.g. the 'CACM' or "Journal of New Politics")
	 *
	**/
	public int pubb=-1, pube=-1;

	/**
	 *
	 * volb and vole store the beginning and end offsets of the
	 * volume substring of the citation.
	 *
	**/
	public int volb=-1, vole=-1;

	/**
	 *
	 * issb and isse store the beginning and end offsets of the
	 * year substring of the citation.
	 *
	**/
	public int issb=-1, isse=-1;

	/**
	 *
	 * miscb and misce store the beginning and end offsets of the
	 * miscellaneous (unused and unrecognised) substring of the citation.
	 * This may be a substantial region for a book citation, or
	 * may hoover up whole citations if the Adobe hyphenated column bug is
	 * in operation or if unrecognised multicites have occurred.
	 *
	**/
	public int miscb=-1, misce=-1;

	/*
	 *
	 * publishb and publishe store the beginning and end offsets of the
	 * publisher name if this citation corresponds to a book.
	 *
	**/
	public int publishb=-1, publishe=-1;

	/**
	 *
	 * placeb and placee store the beginning and end offsets of the
	 * place name if this citation corresponds to a book.
	 *
	**/
	public int placeb=-1, placee=-1;

	/**
	 *
	 * xxxb and xxxe store the beginning and end offsets of the
	 * XXX id string of the citation.
	 * (e.g. 'hep-th/9907001'). This is only used if xxxHint is in operation.
	 *
	**/
	public int xxxb=-1, xxxe=-1;


	/**
	 *
	 * urlb and urle store the beginning and end offsets of the
	 * http address if one is present in the citation.
	 *               - djb
	 *
	**/
	public int urlb=-1, urle=-1;

	// This is the end of the batch of marker variables.

	/**
	 * line contains the whole citation input line from which the
	 * fields are eventually teased.
	 **/
	public String line;

	/**
	 *
	 * maxi is the maximum valid offset that can be used with the
	 * charAt() method of the string which is the current line.
	 * It corresponds to <tt>length()-1</tt>.
	 *
	**/
	public int maxi;

	/**
	 *
	 * documentid holds the id which is passed to the harvester from
	 * "The System".
	 *
	**/
	public String documentid;

	/**
	 *
	 * notAuthor is the first potential author-string token which
	 * seems to not be an author name.
	 * This is internal used by the splitAuthor() and doAuthor() methods.
	 *
	**/
	public String notAuthor; 

	/**
	 *
	 * firstAuthor stores the first named author from the splitAuthor()
	 * method for subsequent use in a multiCite situation.
	 *
	**/
	public String firstAuthor;

	/**
	 * doTXT is one of a group of booleans that control the format of
	 * deciter's output: text, HTML or XML.
	 */
	public boolean doTXT=false;

	/**
	 * doHTML is one of a group of booleans that control the format of
	 * deciter's output: text, HTML or XML.
	 */
	public boolean doHTML=false;

	/**
	 * doXML is one of a group of booleans that control the format of
	 * deciter's output: text, HTML or XML.
	 */
	public boolean doXML=true;
	}
