package uk.ac.soton.harvester;
import java.io.*;
import java.util.*;

/**
 * EntityReader extends the behaviour of <tt>BufferedReader</tt>
 * so that any ISO-Latin-1 entities are replaced by their
 * ASCII/Unicode characters.
 * This class accompanies <tt>EntityWriter</tt> to allow
 * the processor to read data in from and write data out
 * to XML-based files.
 */
public class EntityReader extends BufferedReader {

	/**
         * d provides a lookup from entity name
	 * to character number
         */
        Dictionary d=new ISOLatHashTable();

	/**
         * The main constructor allows an <tt>EntityReader</tt>
         * to be based on any kind of <tt>Reader</tt>.
         */
	EntityReader(Reader in){
		super(in);
		}


	/**
         * entLookup is a wrapper function which guarantees a char
         * for an entity name. It defaults to "_" for unrecognised entities.
         * @param name entity name to be looked up. name may in fact be a
	 *        number of the form <tt>#n</tt> according to the rules of XML.
         * @return String value of length 1, whose first character is the
	 *        character represented by the entity name given as a parameter
         *        (or "_" in pathalogical cases).
         */
	String entLookup(String name){
		String res;

		if(name.charAt(0)=='#'){
			try{
				int i;
				i=new Integer(name.substring(1)).intValue();
				if(i>0 && i<258) res=""+(char)i;
				else res="_";
				}
			catch(NumberFormatException e){
				res="_";
				}
			}
		else{
			res=(String)d.get(name);
			if(res==null)res="_";
			}

		return res;
		}

        /**
         * entString decodes any unusual characters in a string from
         * ISOLAtin-1 entities. Ordinary ASCII characters are
         * left untouched. Some "ordinary" characters ('&','<','>')
         * have also been usurped to conform to the XML standard.
         * <i>e.g.</i>
         * "Carr &amp;mp; Ren&amp;eacute;" is transformed into
	 * "Carr & Ren".
         * @param s the string to process
         * @return the string with embedded entity names replaced.
         */
	String entString(String s){
		if(s==null)return null;

		StringBuffer s1=new StringBuffer();
		int c;

		c=0;
		while(c<s.length()){
			char ch=s.charAt(c);
			if(ch!='&') s1.append(ch);
			else{
				int semi=s.indexOf(';', c);
				if(semi<0)s1.append("&");
				else{
					String name=s.substring(c+1,semi);
					s1.append(entLookup(name));
					c=semi;
					}
				}
			c++;
			}
		return(s1.toString());
		}

	/*
	 * readLine returns an entity-decoded version of the string
	 * read from the <tt>Reader</tt> object (presumabaly an XML stream).
	 */
	public String readLine() throws IOException{
		String s=super.readLine();
		return entString(s);
		}

	}
