/*
 * @author Art Munson
 * @version 1.0
 */

package shotgun;

import java.io.*;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.HashMap;

/**
 * Model sampler that uses an external list to set arbitrary model
 * order and take top p% of models on list (based on list position.
 */
class ExternalSampler implements ModelSampler
{
  private double percent = 1.0;
  private String externalName = null;
  private ArrayList modelList = null;

  /**
   * Construct a model sampler that will load a subset percent of models.
   * Models are selected based on list position.
   * @param percent The percent of models to use.
   * @param filename Name of external file giving ordering of models.
   */
  public ExternalSampler(double percent, String filename)
  {
    if (percent < 0.0 || percent > 1.0) {
      throw new IllegalArgumentException("percent not in range [0,1]: percent=" + percent);
    }

    this.percent = percent;
    modelList = new ArrayList();
    externalName = filename;
    File orderFile = new File(filename);

    int tries = 0;
    boolean success = false;

    while (!success) {
      try {
        // Read models (and the order) from file
        BufferedReader buffer = new BufferedReader(new FileReader(orderFile));
        String modelName = buffer.readLine();
        while (modelName != null) {
          if (validModelName(modelName)) {
            modelList.add(modelName);
          }
          else {
            System.out.println("Error: found model name with invalid format: '"+modelName+"'.");
            System.exit(1);
          }
          modelName = buffer.readLine();
        }
        // If we get here then successfully read whole file.
        success = true;
        buffer.close();
      }
      catch (IOException ioe) {
        // Allow for some retrying to make file reading robost in face
        // of brief network outages or high disk latencies.
        if (tries > 100) {
          System.out.println("Error: problem with file " + filename);
          System.exit(-1);
        }
        else {
          try {
            Thread.currentThread().sleep(10000);
          }
          catch (InterruptedException intexc) {
          }
          ++tries;
          success = false;
        }
      }
    }

  }

  /**
   * Checks that the model name looks reasonable (syntactically).
   * @param modelName The name to check.
   * @return True if the name is valid.
   */
  private static boolean validModelName(String modelName)
  {
    // The name is valid if it does not contain whitespace.
    return (modelName.indexOf(' ') == -1)
      && (modelName.indexOf('\t') == -1);
  }

  /**********************************************************
   * ModelSampler implementation
   **********************************************************/

  public File[][] prescreen(File[][] modelFiles)
  {
    // First create index of model names to offsets in the model files
    // list.  We'll use this for fast lookup later, making this whole
    // operation O(n).
    int numModels = modelFiles[0].length;
    float load_factor = 0.75f;
    int capacity = (int)(numModels / load_factor);
    HashMap index = new HashMap(capacity, load_factor);

    for (int i = 0; i < numModels; ++i) {
      index.put(LibraryLoader.getModelName(modelFiles[0][i]), new Integer(i));
    }

    // How many models do we want to keep?
    int count = (int)(numModels * percent);
    if (count > modelList.size()) {
      System.out.println("Error: external file " + externalName +
                         " does not list enough models to fill " + percent +
                         " sized subset.  A minimum of " + count +
                         " models are needed.");
      System.exit(1);
    }

    // Copy screened files, starting from the top of external list.
    int numSets = modelFiles.length;
    File[][] screenedFiles = new File[numSets][];
    for (int i = 0; i < numSets; ++i) {
      screenedFiles[i] = new File[count];
    }

    for (int j = 0; j < count; ++j) {
      // Get the name of the j'th model in our list.
      String name = (String)modelList.get(j);
      // Find the offset of that model in the original list.
      int offset = ((Integer)index.get(name)).intValue();
      // Copy the file pointers over from original list, for all sets.
      for (int i = 0; i < numSets; ++i) {
        screenedFiles[i][j] = modelFiles[i][offset];
      }
    }

    // Verify in debug code that file names still synchronized across
    // the different sets.  If not, print the mismatch message.
    assert LibraryLoader.verifyFileNames(screenedFiles) == null
      : LibraryLoader.verifyFileNames(screenedFiles);

    return screenedFiles;
  }

  public Model[] postscreen(Model[] models)
  {
    return models;
  }
}





