package uk.ac.soton.harvester;
/**
 *
 * doDate recognises the occurence of the year of publication.
 * It is expected to either be just after the authors and before the
 * article or journal name, or near (at) the end of the citation.
 * It may be in parentheses or followed by a full stop. It may also
 * have a lowercase letter appended to it to distinguish between 
 * similarly authored citations.
 *
 * djb: after finding the year, back up to see if there is a month
 * in front of it, and include that, too.  Or a month abbrev and days
 *
 * djb: sometimes the year contains a - as in 1-21, which actually
 * came from a page range.  Fixes marked djb2.
**/
import java.text.*;

public class DoDate implements AttributeMarker {
	public int markAttribute(DeciterState ds, int i){
		String line=ds.line;
		int maxi=line.length()-1;
		char ch;

		if(maxi-i<4)return i;

		int off=i;
		boolean found=false;

		/* djb while(maxi-off>=4){ */
		while(maxi-off>=3){ 
		   ch=line.charAt(off);
		   if ( ch == '"' )
		   while ( (ch = line.charAt(++off)) != '"');
		   // skipped over quoted title.  ch at position off is "
		   if(ch!='1' && ch!='2'){
			off++;
			continue;
			}
		   if(ch=='2' && line.charAt(off+1)=='0' &&
		      Character.isDigit(line.charAt(off+2)) &&
		      Character.isDigit(line.charAt(off+3))){
			found=true;
		        Utils.DEBUG("Match date at '"+line.substring(off,off+4)+"...'");
			break;
			}
		   /* djb2 - this seems to be where it happens.  
		      I think we are really looking for 18.. and 19..
		   else if(ch=='1' &&
		      (line.charAt(off+1)!='9' || line.charAt(off+1)!='8') &&
		      Character.isDigit(line.charAt(off+2)) &&
		      Character.isDigit(line.charAt(off+3))){
		   */
		   else if(ch=='1' &&
		      (line.charAt(off+1)=='9' || line.charAt(off+1)=='8') &&
		      Character.isDigit(line.charAt(off+2)) &&
		      Character.isDigit(line.charAt(off+3))){
			found=true;
		        Utils.DEBUG("Match date at '"+line.substring(off,off+4)+"...'");
			break;
			}
		   else Utils.DEBUG("Failed to match date at '"+line.substring(off,off+4)+"...'");
		   off++;
		   }

		ds.endofdate=i;
		if(found){
			ds.dateb=off;
			ds.datee=off+3;
			Utils.DEBUG("D{"+line.substring(ds.dateb, ds.datee+1)+"}@["+ds.dateb+"]");
			i=off+4;
			//1996a
			if(i<maxi && Character.isLowerCase(line.charAt(i)))i++;
			//(1996)
			if(i<maxi && line.charAt(i)==')')i++;
			if(i<maxi && line.charAt(i)=='.')i++;
			if(i<maxi && line.charAt(i)==',')i++;
			if(i<maxi && line.charAt(i)==':')i++;
			ds.endofdate=i;
			// DJB: see if there is a month before the year
			String[] sdfs = {"MMM yyy", "M d-d, yyy"};
			off=ds.dateb-1;
			while ( Character.isWhitespace(line.charAt(off))) off--;
			if ( Character.isLowerCase(line.charAt(off))) {
			   while ( Character.isLetter(line.charAt(off))  ) off--;
                           SimpleDateFormat sdf = new SimpleDateFormat("MMM yyyy");
			   try {
			      sdf.parse(line.substring(off+1, ds.datee+1));
		              Utils.DEBUG("Match date at '"+line.substring(off+1,ds.datee+1)+"'");
			      ds.dateb=off+1;
			   } catch (Exception e){}
			} // if lower case
			// end of djb mods
			return i;
			}
		else { ds.endofdate=-1; return i; } // djb
		}
	}


