package uk.ac.soton.harvester;
import java.util.*;

/**
 * EntityEncoder is a convenience class that allows
 * the deciter class to directly code entity strings
 * without using an EntityWriter.
 * Its purpose is to changeo non-ASCII characters in
 * strings to their ISO-Latin-1 entity name equivalents.
 */
public class EntityEncoder {

        /**
         * d provides a reverse lookup from character number
         * to entity name
         */
	Dictionary d=new ISOLatRevHashTable();

        /**
         * entName is a wrapper function which guarantees a safe
         * name for a character position. It defaults to
         * "unknown" for pathalogical cases.
         * @param ch character value to look up
         * @return ISOLatin-1 entity name of the character parameter
         * (or "unknown" in pathalogical cases).
         */ 
	String entName(char ch){
		String result;
		result=(String)d.get(""+ch);
		if(result==null)result="unknown";
		return result;
		}

        /**
         * PCDATA is just an alias for <tt>encode</tt>.
	 */
	String PCDATA(String s){
		return encode(s);
		}

        /**
         * encode encodes any unusual characters in a string as
         * ISOLAtin-1 entities. Ordinary ASCII characters are
         * left untouched. Some "ordinary" characters ('&','<','>')
         * have to be usurped to conform to the XML standard.
         * <i>e.g.</i> "Carr & Ren" is transformed into
         * "Carr &amp;mp; Ren&amp;eacute;" .
         * @param s the string to process
         * @return the string with embedded characters replaced by
         * entity names
         */
	String encode(String s){
		StringBuffer s1=new StringBuffer();
		int c;

		for(c=0; c<s.length(); c++){
			char ch=s.charAt(c);
			if(ch=='&') s1.append("&amp;");
			else if(ch=='<') s1.append("&lt;");
			else if(ch=='>') s1.append("&gt;");
			else if(((int)ch)>=32 && ((int)ch)<127) s1.append(ch);
			else s1.append("&"+entName(ch)+";");
			//else s1.append("&#"+(int)ch+";");
			}
		return(s1.toString());
		}

	}
