<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">import java.io.*;
import java.util.*;
import java.net.*;

/**  An instance produces a set of links on a web site. Also, during
     construction of this instance, any links on the web site that are 
     malformed or unreachable are printed on the Java console. */
public class LinkChecker {
    // The root url of the web site
    private String startUrl;
    
    // The prefix of any url that is processed by this instance (null if
    // prefixString is not a real URL),
    private URL prefixString;
    private String prefixStringHost; // the host of prefix prefixString
    private String prefixStringFile; // the file of prefix prefixString

    // Variable urls contains the URLs (as Strings) of web pages that
    // (1) are reachable from startURL,
    // (2) have prefixString as a prefix,
    // (3) have protocol http or file, and
    // (4) have .html or .htm as the suffix
    // During construction of this instance, variable urls contains the set of
    // urls for pages that have been or are undergoing processing.
    private Set urls= new Set();        
    
    private Set links= new Set(); // The links (as Strings) on the pages in urls
    

    /** Constructor: an instance contains &lt;br&gt;
         (a) a set of webpage URLs that are reachable from startURL (which
             has to be a real URL) and whose URL has p as a prefix, and&lt;br&gt;
         (b) a set of good links on the pages in (a).&lt;br&gt;
       Further, during construction of the instance, detected bad links are
       printed on the Java console.&lt;br&gt;
       Strings startUrl and p have to be well-formed, absolute URL. If startURL
       does not have p as a prefix, then the set of good links is empty. Finally,
       if attempting to make a URL out of p gives an error, the prefix String 
       "http://www.cs.cornell.edu" is used.
    */
    public LinkChecker(String startUrl, String p) {
        this.startUrl= startUrl;
        URL temp= null;
        try {
            temp= new URL(startUrl);  
        } catch (MalformedURLException e) {
            System.out.println("startUrl is not a valid url; " +
                                "the set of links will be empty ");
            return;
        }
        
        try {
            prefixString= new URL(p);
        } catch (Exception e) {
            try {
            System.out.println("prefix is not a valid URL; instead, " +
                               "http://www.cs.cornell.edu is used");
                prefixString= new URL("http://www.cs.cornell.edu");
            } catch (MalformedURLException e1) {
            }
        }
        
        prefixStringHost= prefixString.getHost();
        prefixStringFile= prefixString.getFile();


        // If p is not a prefix of startURL, return
            if (! startUrl.startsWith(p))
                return;

        Webpage wp= new Webpage(startUrl);
        links.add(wp.getStringUrl());
        addAllLinkedURLs(wp,null);
    }
    
    /**  = the links reachable from the root web page */
    public Set getLinks() {
        return links;
    }
    
    /** = "Links can be extracted from web page wp", i.e.
          (1) wp has the same host as prefixStringHost,
          (2) wp's file starts with prefixStringFile; 
          (3) wp is not in urls", and
          (4) wp is an html page.
        FILL IN THE BODY OF THIS METHOD
      */
    private boolean shouldProcess(Webpage wp) {
        return false;
    }
    
    /** A link to wp appears on webpage onWp (if onWp is not null).
       The url for wp is not in set urls and has prefixString as a prefix. 
       Add to urls all the urls for webpages that are reachable from wp
       (including wp) and that have prefixString as a prefix. Also, add to
       links all the links that are on these pages --not only .htm and .html
       files but all links. See class Webpage for a specification of "all links".
       Finally, if webpage wp cannot be read, for whatever reason, print an 
       error message on the Java console.
     */
    private void addAllLinkedURLs(Webpage wp, Webpage onWp) {
    }
}
</pre></body></html>