package uk.ac.soton.harvester;
/**
 *
 * doTitle recognises the extent of the article title.
 * This method is one of the least exact of the clas and requires
 * subsequent sanity checks. It basically takes the form of
 * three special cases: quoted title, italicised title or
 * a complete sentence.
 * XXX is another (unusual) special case (!) in which the
 * journal title is treated as an article title (ie recognised here)
 * and then transferred before the output stage.
 *
**/
public class DoTitle implements AttributeMarker {
	public DoTitle(){}

	public int markAttribute(DeciterState ds, int i){
		String line=ds.line;
		int maxi=line.length()-1;
		char ch=' ';

		// (djb) if there is no date, it is always too close
		// (djb) add check for not being -1
		// (djb) add check that date is not earlier than authe
                if(ds.dateb > -1 && ds.dateb>ds.authe && ds.dateb-ds.authe<6){
			i=ds.endofdate;
                        Utils.DEBUG("Date too close to authors"+
			"(ds.dateb="+ds.dateb+",ds.authe="+ds.authe);
			}
		else Utils.DEBUG("Date too near to end");
		if(i>=maxi)return i;

		if ( i <0 ) i=0;  // djb - to kill index out of bounds error
		Utils.DEBUG("Title starts at "+i);//+" ("+line.substring(i)+")");
 

		while(i<maxi && Character.isWhitespace(ch=line.charAt(i)))i++;

		//well, either it's quoted, or italicised or a sentence.
		//unless its XXX in which case its everything!
		if(ds.xxxHint){
			int j=i;
			//title should at least start with an uppercase character
			while(j<maxi && !Character.isUpperCase(line.charAt(j)))
				j++;
			if(j==maxi)return i;
			ds.titb=ds.tite=j;

			while(Character.isUpperCase(line.charAt(j))){
				if(Character.isDigit(line.charAt(j+1))){
					//found B433 as in Nucl Phys B433
					ds.tite=j;
					break;
					}
				while(j<maxi && !Character.isWhitespace(line.charAt(j)))j++;
				ds.tite=j-1;
				while(j<maxi && Character.isWhitespace(line.charAt(j)))j++;
				}
			i=j;
			}
		else if(maxi-1<4){
			//no point in trying to find a title
			}
		else if(ch=='"'){
			Utils.DEBUG("E");
			int j=line.indexOf('"',i+1);
			if(j<0)j=ds.titb+1; //spit.
			ds.titb=i+1; ds.tite=j-1;
			i=j+1;
			}
		else if(ch=='\''){
			Utils.DEBUG("F");
			int j=line.indexOf('\'',i+2);
			//that's i+2 up there to get around the use of
			//two apostrophes as one dounble quote
			if(j<0)j=ds.titb+1; //spit.
			else if(Character.isLetter(line.charAt(j+1))){
				j=line.indexOf('\'',j+1);
				//ought to do this iteratively to avoid
				//apostrophes
				}
			ds.titb=i+1; ds.tite=j-1;
			i=j+1;
			}
		else if(ch=='<' &&
			Character.isLetter(line.charAt(i+1)) && 
			line.indexOf('<',2)>10 //ignore italicised sort words at the front of a title
			){
			Utils.DEBUG("G");
			char tc=line.charAt(i+1);
			int j=line.indexOf("</"+tc, i+1);
			if(j<0)j=ds.titb+1; //spit. 
			ds.titb=i; while(ds.titb<maxi && line.charAt(ds.titb)!='>')ds.titb++; ds.titb++;
			ds.tite=j-1;
			i=j; while(i<maxi && line.charAt(i)!='>')i++; i++;
			}
		else { //sentence city
			Utils.DEBUG("H");
			ds.titb=i;
			int j=line.indexOf('.',i+1);
			Utils.DEBUG("0Err, (i,j)=("+i+","+j+")");
			if(maxi-j<4)j=-1; //don't use the end-of-cite punct.
			Utils.DEBUG("1Err, (i,j)=("+i+","+j+")");
			if(j<0)j=line.indexOf('?',i+1);
			Utils.DEBUG("2Err, (i,j)=("+i+","+j+")");
			if(j<0)j=ds.titb+1; //spit.
			Utils.DEBUG("3Err, (i,j)=("+i+","+j+")");
			ds.tite=j;
			Utils.DEBUG("4Err, (i,j)=("+i+","+j+")");
			Utils.DEBUG("'"+line.substring(i,j)+"'");
			i=j+1;
			}

                if(ds.titb>=0&&ds.tite>=ds.titb){
			Utils.DEBUG("Title='"+line.substring(ds.titb,ds.tite+1)+"'");
			}
		else {
			Utils.DEBUG("Title=null");
			// djb - see if there are authors.  It is rare to have
			// authors and no title
			if ( ds.authe > 0 ) { // swap
			   ds.titb = ds.authb;
			   ds.tite = ds.authe;
			   ds.authb=ds.authe=-1;
			   i = ds.tite+1;
			   Utils.DEBUG("Title retrieved from authors ='"
			   +line.substring(ds.titb,ds.tite+1)+"'");
			} else
			// djb - prevent index out of bounds in prepare:
			ds.titb = -1;     
			}
 
		return i;
		}
	}


