package uk.ac.soton.harvester;
/**
 *
 * doPublisher attempts to recognise the publisher of a book
 * citation as a string that immediately precedes the place string in
 * the citation.
 *
**/
public class DoPublisher implements AttributeMarker {
	public int markAttribute(DeciterState ds, int i){
		if(!Utils.isBook(ds))return i;

		ds.publishb=ds.publishe=-1;

		String line=ds.line;
		int max=line.length()-1;
		if(max<1)return i;

		String place;
		if(ds.placeb==-1 || ds.placee==-1 || ds.placee<ds.placeb)
			return i;

		//clear up punctuation at end of line
		char ch;
		int mi=ds.placeb-1;
		while(mi>0 && (!Character.isLetterOrDigit(ch=line.charAt(mi))
		      || Character.isWhitespace(ch)))mi--;

Utils.DEBUG("ENDING PUBL AT "+mi);
Utils.DEBUG("ENDING PUBL AT "+mi+" ("+line.substring(mi)+")");
		ds.publishe=mi;

		//now find beginning of publisher by looking for
		//significant punctuation, e.g. fullstop. or the end
		//of a previous tag (e.g. close italic of a title)
		while(mi>0){
			ch=line.charAt(mi-1);

			if(ch=='>')break;
			if(ch=='.'){
				//the fullstop could end a phrase or
				//abbreviated word. we are interested
				//in the common "Univ." or "Uni." in
				//a press name
				int ws;
				for(ws=mi-1; ws>0 && !Character.isWhitespace(line.charAt(ws)); ws--);
				int wsend=ws+6;
				if(wsend>mi-1)wsend=mi-1;
				if(ws>mi-2)ws=mi-2;
				String abbrev=line.substring(ws+1, wsend);
				if(abbrev.startsWith("Uni") ||
				   abbrev.startsWith("Co.")){
					//dont break;
					}
				else break;
				}
			mi--;
			}
		
		while(mi>0 && Character.isWhitespace(ch=line.charAt(mi)))mi++;
		ds.publishb=mi;
Utils.DEBUG("STARTING PUBL AT "+mi+" ("+line.substring(ds.publishb,ds.publishe+1)+")");

		return ds.publishe+1;
		}
	}

