package uk.ac.soton.harvester;
import java.util.*;

/**
 *
 * doPlace attempts to recognise the place of publication of a book
 * citation from the misc section.
 *
**/
public class DoPlace implements AttributeMarker {
	public int markAttribute(DeciterState ds, int i){
                if(!Utils.isBook(ds))return i;
 
		ds.placeb = ds.placee = -1;

		String line=ds.line;
		int max=line.length()-1;
		if(max<1)return i;

		String region;
		if(ds.misce-ds.miscb>10)region=line.substring(ds.miscb, ds.misce);
		else region=line;
 
		//last thing should be the place
		//so record last four tokens
		//(we'll have to work out indexes later)
		String lt1="", lt2="", lt3="", lt4="";
		StringTokenizer st=new StringTokenizer(region, " \t.'\"1/%^&*()-_=+{[}]:;@<>?"); //make sure you leave a comma for later
		while(st.hasMoreTokens()){
			lt1=lt2;
			lt2=lt3;
			lt3=lt4;
			lt4=st.nextToken();
			}
Utils.DEBUG("Placetoks='"+lt1+"' '"+lt2+"' '"+lt3+"' '"+lt4+"'");
		if(lt4.length()==2 && Character.isUpperCase(lt4.charAt(0)) && Character.isUpperCase(lt4.charAt(1))){
			lt4=lt3+" "+lt4;
			lt3=lt2;
			lt2=lt1;
			lt1="";
			}
Utils.DEBUG("Placetoks='"+lt1+"' '"+lt2+"' '"+lt3+"' '"+lt4+"'");
		if(lt3.equals("New")){
			lt4=lt3+" "+lt4;
			lt3=lt2;
			lt2=lt1;
			lt1="";
			}
Utils.DEBUG("Placetoks='"+lt1+"' '"+lt2+"' '"+lt3+"' '"+lt4+"'");

		//place name should be preceded by a comma?
		//could be a comma by itself "Elsevier & Co.," gets split on
		//the full stop by Adobe.
		int lc=lt3.length()-1;
		if(!(lc>=0 && lt3.charAt(lc)==',')){
			//this didn't work! just return the input offset
			ds.placeb=ds.placee=-1;
			return i;
			}

Utils.DEBUG("Placetoks='"+lt1+"' '"+lt2+"' '"+lt3+"' '"+lt4+"'");
		//OK, so we know *what* the place is, we
		//just need to locate it on the line.
		//we do that by looking fo the first and last words
		String place=lt4;
		int placeFWi=place.indexOf(' ');
		int placeLWi=place.lastIndexOf(' ');

		String placeFW;
		if(placeFWi>0) placeFW=place.substring(0,placeFWi);
		else placeFW=place;
Utils.DEBUG("placeFW='"+placeFW+"'");

		String placeLW;
		if(placeLWi>0) placeLW=place.substring(placeLWi+1);
		else placeLW=place;
Utils.DEBUG("placeLW='"+placeLW+"'");

		ds.placeb=line.lastIndexOf(placeFW);
		ds.placee=line.lastIndexOf(placeLW)+placeLW.length()-1;

		//the plac should be the last thing on the line
		return i;
		}
	}
