import java.net.*;
import java.io.*;

/** An instance is a webpage, given by a url. */ 
public class Webpage {
    private String u;         // The url for this Webpage (as a String)
    private URL    url;       // The url for this Webpage (as a URL)
                              // --null if there is a problem with the URL
    private String protocol;  // the protocol of this url (null if none)
    private String host;      // the host of this url (null if none)
    private String path;      // the path of this URL (null if none)

    private Set links= null;  // The set of links on this page. (null if not
                              // yet constructed). ONLY Strings SHOULD BE STORED
                              // IN THIS SET
                              
    /**  Constructor: a web page for URL u */
    public Webpage(String u) {
        this.u= u;
        try {
            this.url= new URL(u);
            protocol= url.getProtocol();
            host= url.getHost();
            path= url.getFile();
        } catch (Exception e) {
            System.out.println("exception: " + e);
            url= null;
            protocol= null;
            host= null;
            path= null;
        }
    }

    /**  = the URL for this webpage (as a String) */
    public String getStringUrl() {
        return u;
    }

    /**  = the URL for this webpage (as a String) */
    public URL getUrl() {
        return url;
    }

    /**  = "This is an html page: url is not null,
           it has protocol http or file,
           and it has suffix .html or .htm"
    */
    public boolean isHtmlPage() {
        String prot= protocol.toLowerCase();
        String pathLower= path.toLowerCase();
        return (url != null)
            && (prot.equals("http")
                || prot.equals("file"))
            && (pathLower.endsWith(".htm")
                || pathLower.endsWith(".html"));
    }

    /**  = the links on this webpage, each as a String (= null if the
           webpage cannot be read).  Call this only if the protocol is http or
           file and the URL ends in .htm or .html. We define the set of links as
           follows. If a tag of the webpage has an argument of the form
               href="xxx" 
           or
               src="xxx"
           with or without blanks on either side of "=", then "xxx" is a link. Only
           one link per tag is discovered.
     */
    public Set getLinks() {
        if (links != null)    // The set of links is constructed only once.
            return links;
        links= new Set();
        BufferedReader br= getReader();
        if (br == null)
            return null;

        // Add to links the links that are in BufferedReader br.
        LinkEnumeration le= new LinkEnumeration(br);
        while(le.hasMoreElements()) {
            String link= (String)le.nextElement();
            if (link != null) {
                try {
                    URL linkURL= new URL(url,link);
                    links.add(linkURL.toString());
                } catch (MalformedURLException e) {
                    // Just skip it.
                }
            }
        }
        return links;
    }

    /**  = "this URL is bad --it does not exist or cannot be loaded" */
    public boolean isBad() {
        return url == null;
    }

    /**  = a reader for URL url (which must not be null). If the protocol
           is not http or file, null is returned. */
    private BufferedReader getReader() {
        if (!protocol.equals("http")  &&  !protocol.equals("file")) {
            return null;
        }

        try {
            InputStream is= url.openStream();
            InputStreamReader   isr= new InputStreamReader(is);
            return new BufferedReader(isr);
        } catch (IOException e) {
            return null;
        }
    }

    /**  = a String representation of this Webpage */
    public String toString() {
        return "URL " + u 
            + " Protocol: " + protocol
            + " Host: " + host
            + " Port: " + url.getPort()
            + " Path: " + path;
    }

}