package LDMS::Table;
# Table.pm
#
# Handles tables.
#
# $Header: /home/LDMS/cvsroot/LDMS/LDMS/Table.pm,v 1.13 2000/12/02 17:47:18 jhj4 Exp $
#

#Author - Ju Joh (jhj4@cornell.edu)

use strict;
use warnings;
require 5.002;
use lib '..';
use LDMS::Error;  # LDMS error module.
use LDMS::XMLOut;
use LDMS::DataText;
use LDMS::Pre;

BEGIN {
    use Exporter ();
    our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);

    # Set our version (for module version checking).

    @ISA = qw(Exporter);

    # Exported functions.
    @EXPORT = qw(&markUpTable &getTokenPos); 
    %EXPORT_TAGS = ( );

    # Exported global variables and optional exported functions.
    @EXPORT_OK = qw();

}
our @EXPORT_OK;

# Non-exported global variables.


# Initialize exported global variables.


# Initialize non-exported global variables.


# Create private global variables.
my @tableBlock= @_;  #input 
my $tablei; #index for the current line within @tableBlock.
my @tableLineState;  #Integer to keep track of various states within the
                          #  input text.
                          #  0  Table has not started, or finished.
                          #  1  Table has started and is in headers.
                          #  2  Table has started and is in the actual data. 
                          #  10 The line is a name of the next table.
my @tableFields =();  #Array of strings to keep track of the field headers.
my @tableFieldPos = (); #Array of positions for the beginning of each field.
my $guideField = ""; #Guide used to determine spacing. 


# Prototype functions.
sub markUpTable(@);
sub getFieldHeaders();
sub markUpFieldData();
sub getTokenPos($, $);
sub getTablePosInfo();
# Module destructor.
END { }

#markUpTable(@)
#Properly tag, and use XMLOut to output the tables. 
#Pre: The input is an array of text lines. 
#     It must contain the name of the table, if there is one. 
#     Also none of the fields in the table can overlap another field, and 
#      a field must start at a identical pos in every line.
#     Assumes that a line is 73 chars long (or less than 10000).
#      
#Post: The text will have been outputed with tables, if any, included.
sub markUpTable(@)
{
    
   @tableBlock = @_;  #input 
   my $j; #loop index
   my $gracefulFailure = 0;

   if (scalar(@tableBlock) == 0)
   {
      return;
   }

   #Go through the input data and mark each line's state. 
   for ($tablei=0; $tablei < scalar(@tableBlock); $tablei++)
   { 

      #Tables start with a long line of multiple dashes
      if (!($tableBlock[$tablei] =~ /^\s*-{35,}\s*$/x)) 
      {
         if (!($tableBlock[$tablei] =~ /^\s*-{20,}\s*$/x)) 
         {
            if ($tablei > 0)
            {
               #Generally $LineState does not change from line to line.
               $tableLineState[$tablei] = $tableLineState[$tablei-1];
            }
            else
            {
               #In the very beginning set to 0.
               $tableLineState[$tablei] = 0; 
            }
         }
         else
         {
            if ($tableLineState[$tablei-1] == 0)
            {  #Then this dash line is one used to demark text, not table.

               #$LineState does not change from line to line.
               $tableLineState[$tablei] = $tableLineState[$tablei-1];
            }
            elsif ($tableLineState[$tablei-1] == 2)
            {  #Table has ended. Set the line state to 0.
               $tableLineState[$tablei] = 0; 
            }
            else
            {  #This doesn't make any sense.  Graceful failure.
               $gracefulFailure = 1; 
            }
         }
      }
      else
      {
         if ($tablei > 0)
         {
            $tableLineState[$tablei] = $tableLineState[$tablei-1] + 1;
         }
         else
         {
            #The very beginning line is a dash, so set the line state to 1.
            $tableLineState[$tablei] = 1; 
         }
         if ($tableLineState[$tablei] == 1)
         {
            #Then the previous lines could be a title to the table.
            #Check to see if there are previous lines.
            if ($tablei > 0)
            {
               $j = $tablei-1; 
               while ($j >= 0)
               {
                  #It is assumed that if there is more than 3 spaces to each

                  # margin and the line is currently marked as a non-table,

                  # then the line is a name to the table.
                  
                  # Assume that each line in USCode files are 73 chars long.

                  if (($tableBlock[$j] =~ /^\s{3,}.*$/x)
                     && ($tableLineState[$j] == 0)
                     && (length($tableBlock[$j]) < 70))
                  {
                     $tableLineState[$j] = 10;
                  }
                  else
                  {
                    #No reason to check anymore, since a name 
                    # to the table has to be contigous
                    $j = -1;
                  }
                  $j--;
               }#while ($j >= 0)
            } 
         }
         if ($tableLineState[$tablei] == 3)
         {
            #Then there is something wrong.  A normal table should only have
            #  two long dash lines and one short dash line to end it.
            $gracefulFailure = 1; 
         }
      }#(!($tableBlock[$tablei] =~ /^\s*-{35,}\s*$/x))
   }#($tablei=0; $tablei < scalar(@tableBlock); $tablei++)
###########
#  Check for Validity of the Lines.
#
#  If there is only a partial table in the input, then mark up everything after
# the first line of dashes using PRE.
 
   $tablei = 0;
   while ($tablei < scalar(@tableBlock))
   {
      while ((($tablei < scalar(@tableBlock)) && $tableLineState[$tablei] == 0))
      {
         $tablei++;
      }
      while ((($tablei < scalar(@tableBlock)) && $tableLineState[$tablei] == 10))
      {
         $tablei++;
      }
      if ((($tablei < scalar(@tableBlock)) && $tableLineState[$tablei] == 1))
      {
         while ((($tablei<scalar(@tableBlock)) && $tableLineState[$tablei] == 1))
         { $tablei++; }
         if ((($tablei < scalar(@tableBlock)) && $tableLineState[$tablei] == 2))
         {
            while ((($tablei <scalar(@tableBlock))&&$tableLineState[$tablei] ==1))
               { $tablei++; }
         }
         else
         {
            &printErrMsg("Warning: Table does not contain any field data.\n Using Pre!");
            $gracefulFailure = $tablei;
            last;
         }
      }
      else
      {
         if ($tablei < scalar(@tableBlock))
         {
            &printErrMsg("Warning: Table contains too few lines of dashes.\n Using Pre!");
            $gracefulFailure = $tablei;
            last;
         }
      } 
      while ((($tablei < scalar(@tableBlock)) && $tableLineState[$tablei] != 0))
      { $tablei++; }
   } 
$tablei = 0;
if ($gracefulFailure != 0)
{
   &tagPre(@tableBlock);
   return;
}
###########
# GetTableName
# Each line now has a state value assigned to it.
   #Check to make sure that the  
   #Start to print the Tags, use DATATEXT module to output the Divfield values.

   $tablei = 0;
   while ($tablei < scalar(@tableBlock))
   {
      my $startNonTable = $tablei;
      while ((($tablei < scalar(@tableBlock)) && $tableLineState[$tablei] == 0))
      {
         $tablei++;
      } 
      &processDataText(@tableBlock[$startNonTable..($tablei-1)]);
      if ($tablei == scalar(@tableBlock))
      { next; }
      &BeginTag("TABLE");
      my $startTableNameInd = $tablei;
      while ($tableLineState[$tablei] == 10)
      { $tablei++; }  
      if ($tablei > $startTableNameInd)
      {
         &BeginTag("TABLENAME");
         &processDataText(@tableBlock[$startTableNameInd..($tablei-1)]); 
         &EndTag(); ##</TABLENAME>
      }
      if ($tablei == scalar(@tableBlock))
      { next; }

      if ($tableBlock[$tablei] =~ /^\s*(--|---)+\s*$/x) 
      {
         $tablei++; #Skip the line of dashes.
      }

      # Empty lines not in data fields  should be output as-is.
#      if (($tableBlock[$tablei] =~ /^\s*$/x) && ($tableLineState[$tablei] < 2)) 
#      {
#	  &WriteString($tableBlock[$tablei]);
#	  $tablei++;  # Skip empty space.
#      }
      &getFieldHeaders();
      &getTablePosInfo();
      &markUpFieldData();
      &EndTag(); ##</TABLE>
   }#($tablei < scalar(@tableBlock))

}

##########
# getTokenPos
#Takes inputs of a string and a string and returns an array of numbers that 
# contains the positions of each new token made from first string delimited 
# by second string as regular expression.
#Pre: An input is a single line, and the regular expression valid
#Post: An array will be returned filled with delimiting position values.
#      The position numbers start at 0, and end with (the length of string - 1).

sub getTokenPos($, $)
{
   my $inputString = shift(@_);   #The string to tokenize
   my $patternString = shift(@_);  #Pattern to delimt the $inputString.
   my @posArray;  #Array for output.
   my $arrayInd = 0;  #Index for @posArray.

   while ($inputString=~m/$patternString/xg)
   {
      $posArray[$arrayInd] = pos($inputString) - 1;
      $arrayInd++;
   } 
   $posArray[$arrayInd-1]++;
   return @posArray;

}

##########
# getFieldHeaders
#Split the field headers by 3 spaces and then put them into an array, and then pass
#the array as in input and tag using processDataText
#Pre: Table Name has already been parsed and marked up.
#     There are no EOLN chars in input.
#Post: Field headers, if any, will have been stored to @tableFields.
sub getFieldHeaders()
{
   if ((($tablei < scalar(@tableBlock)) && $tableLineState[$tablei] == 1))
   {
      my $firstFieldLine = $tablei; #This is the line number where the
                               # field header starts.
      my $startOfMatch = 0; #Start of match, i.e. the beginning of the field.
                            # starts on 0.
      my $endOfMatch = 0;  #End of match, i.e. the end of the field.

      my $guideLine = $tableBlock[$tablei];  #Guide to field positions.
                                      # to determine spacing.
      my $tableFieldInd = 0; #Index to note the current field being processed.

      @tableFieldPos = &getTokenPos($guideLine, '^(\s*)\S|(\s{3,}\S)|$');
      for ($tableFieldInd = 0; $tableFieldInd < scalar(@tableFieldPos)-1;
           $tableFieldInd++)
      {
         $startOfMatch = $tableFieldPos[$tableFieldInd];         
         $endOfMatch = $tableFieldPos[$tableFieldInd+1] - 1;

         #Reset the beginning of Table Field Header
         $tablei = $firstFieldLine;
         
         #Extract the header.
         #The first line begins the field name, the rest of the lines, if any
         # simply attach to the first line.     
         #$endOfMatch should be at most the length of current line minus 1.
         if ($endOfMatch > length($tableBlock[$tablei])-1)
         {   $endOfMatch = length($tableBlock[$tablei])-1;   }
         $tableFields[$tableFieldInd]=substr($tableBlock[$tablei], $startOfMatch,
                           $endOfMatch-$startOfMatch+1);    
         #Increment the line counter
         $tablei++;

         #Process rest of the lines.
         while (($tablei<scalar(@tableBlock)) && $tableLineState[$tablei]==1)
         {  #There are more lines.
            #$endOfMatch should be at most the length of current line minus 1.
            if ($endOfMatch > length($tableBlock[$tablei])-1)
            {   $endOfMatch = length($tableBlock[$tablei])-1;   }

            if ($startOfMatch <= $endOfMatch)
            {  #Append to existing field header.
               #We will use \n as the delimiter, and split it to \n before 
               #   we send the lines to processDataText()         
               $tableFields[$tableFieldInd]=$tableFields[$tableFieldInd]."\n".
                 substr($tableBlock[$tablei], $startOfMatch,
                        $endOfMatch-$startOfMatch+1);
            }
            else
            {  #Append a blank line.
               #We will use \n as the delimiter, and split it to \n before 
               #   we send the lines to processDataText()         
               $tableFields[$tableFieldInd]=$tableFields[$tableFieldInd]."\n";
            }
                         
            #Increment the line counter
            $tablei++;
          
         }# (($tablei < scalar(@tableBlock)) && $tableLineState[$tablei] == 1) &&)
      } #End of loop ($tableFieldInd=0; $tableFieldInd < length(@tableFieldPos)-1;
        #   $tableFieldInd++) 
   }
   if ($tableBlock[$tablei] =~ /^\s*(--|---)+\s*$/x) 
   {
      $tablei++; #Skip the line of dashes.
   }
   return;
}


#########
# getTablePosInfo
# Calculates the horizontal and vertical size of the table 
# Also caculates the delimiting position of each field.
#
# Pre: getFieldHeaders have been run. @tableBlock has been initialized.
#      $tablei is in correct line of @tableBlock.
#      Also none of the fields in the table can overlap another field.
#      Assumes that a line is 73 chars long.
# Post: @tableFieldPos contains correct values.
sub getTablePosInfo()
{
      my @localTableFieldPos; # Local version of @tableFieldPos, to be returned to caller.
   if (scalar(@tableFieldPos) > 0)
   {
      #There is a header, and our work is done.  Simply keep the position
      # info calculated from the table headers.  
      #This should be the case for most tables.
      
      $tableFieldPos[scalar(@tableFieldPos)-1] = 73;
      return;
   }
   else
   {
      #Tsk, tsk, tsk...some real messy processing to do.
      #Go through each line, and try to identify the startOfMatch for each field,
      # and store to @tableFieldPos.      
      #In each loop find the left most string that starts after the end of the 
      # previous string.  Strings need not be necessarily of a same line.
      #Use start and end of the string as the startOfMatch, and endOfMatch. 

      #Local copy of tablei, so that when the function
      # ends, we haven't moved to a different line of text.
      my $localTablei = $tablei; 


      my $localFieldInd = 0;  #Keeps track of at which field we are looking.
      my $curBestStartOfMatch = -1;#Currently Best startOfMatch for current field.
      my $nextLocalEndOfMatch = -1; #next localEndOfMatch.
      my $localEndOfMatch = -1; #$EndOfMatch for previous field.
      my $i; #A counter for various purposes;
      my $globalEndOfMatch = -1; #Global $EndOfMatch for the text line. 

      $curBestStartOfMatch = 10000; # A really large number that should be way larger than 
                                   #  the possible line length.
      my $newField = 1; #True

      while ($newField == 1) 
      {  #while we find new fields, continue.
         $newField = -1; #False
         $localTablei = $tablei;
         while ($localTablei < scalar(@tableBlock) && $tableLineState[$localTablei] == 2)
         {     
            #Temporary variable to store the current line's field positions.   
            my @linePos = ();
            @linePos = &getTokenPos($tableBlock[$localTablei], 
                                    '^(\s*)\S|(\s{3,}\S)|$');
            $i = 0;
            while (($i < scalar(@linePos)) &&
                   $linePos[$i] <= $localEndOfMatch)
                  
            {  $i++; } #($linePos[$i] < $localEndOfMatch)
            if ($i < scalar(@linePos)-1 
                 #We don't want the end of line as a new field. 
                && ($linePos[$i] < $curBestStartOfMatch) 
                && ($linePos[$i] > $localEndOfMatch))
                   
            {
               $curBestStartOfMatch = $linePos[$i];
               #Assign end of this field as next $localEndOfMatch.       
               $nextLocalEndOfMatch = $linePos[$i+1]-1; 
               $newField = 1; #True
            }

            if ($linePos[scalar(@linePos) - 1] > $globalEndOfMatch)  
            {
               $globalEndOfMatch = $linePos[scalar(@linePos) - 1];
            }
            $localTablei++;
            
         }# ($localTablei < scalar(@tableBlock)) && $tableLineState[$localTablei] == 2)
         #Prepare for next iteration.
         if ($newField == 1)
         {
            #Store to $localTableFieldPos.
            $localTableFieldPos[$localFieldInd++] = $curBestStartOfMatch;
            
            $localEndOfMatch = $nextLocalEndOfMatch;       
            $curBestStartOfMatch = 10000; 
            # A really large number that should be way 
            #   larger than any possible line length.
         }
         else
         {
            #The last value of @tableFieldPos is the largest eoln position.
            $localTableFieldPos[$localFieldInd++] = $globalEndOfMatch;
         }
      }#($newField)  

      #Export to global var.
      @tableFieldPos = @localTableFieldPos;
   }
}

#########
# markUpFieldData
# marks up the DivField's
# Pre: @tableBlock has been initialized. 
#      $tablei is in correct line of @tableBlock.
#      @tableFieldPos contains correct values.
# Post: DivField data has been marked up.

sub markUpFieldData()
{
   if (($tablei < scalar(@tableBlock)) && $tableLineState[$tablei] == 2)
   {
      my $firstFieldLine = $tablei; #This is the line number where the
                               # field data starts.
      my $startOfMatch = 0; #Start of match, i.e. the beginning of the field.
                            # starts on 0.
      my $endOfMatch = 0;  #End of match, i.e. the end of the field.

      my $tableFieldInd = 0; #Index to note the current field being processed.
   
      for ($tableFieldInd = 0; $tableFieldInd < scalar(@tableFieldPos)-1;
           $tableFieldInd++)
      {  #For each field.


         #Reset the beginning of Table Field Header
         $tablei = $firstFieldLine;
         
         &BeginTag("FIELD");
         if (scalar(@tableFields) > $tableFieldInd)
         {
            &BeginTag("FIELDNAME");
            &processDataText(split("\n",$tableFields[$tableFieldInd]));
            &EndTag();
         }
  
         #Process rest of the lines.
         while (($tablei < scalar(@tableBlock)) && $tableLineState[$tablei] == 2)
         {  #While there are more divField's

            my $startDivField = $tablei;  #starting line number of DivField.
            my @divFieldData; #Array to be store a DivField.

  
            $startOfMatch = $tableFieldPos[$tableFieldInd];         
            $endOfMatch = $tableFieldPos[$tableFieldInd+1] - 1;

            #The first line begins the field data, and the following lines that have space as
            # the first character are simply attached to the first line.     
            #$endOfMatch should be at most the length of current line minus 1.
            if ($endOfMatch > length($tableBlock[$tablei])-1)
            {   $endOfMatch = length($tableBlock[$tablei])-1;   }

            if ($startOfMatch <= $endOfMatch)
            {
               $divFieldData[$tablei-$startDivField]
                  = substr($tableBlock[$tablei], $startOfMatch, 
                           $endOfMatch-$startOfMatch+1);
            }
            else
            {
               $divFieldData[$tablei-$startDivField] = "";
            }

            #Increment the line counter
            $tablei++;

            my $indented = 1; # The next line indented is True.
            while (($tablei<scalar(@tableBlock) && $tableLineState[$tablei]==2)
                   && ($indented == 1))
            {  #While there are more lines to the current divfield. 
         
               $startOfMatch = $tableFieldPos[$tableFieldInd];         
               $endOfMatch = $tableFieldPos[$tableFieldInd+1] - 1;
               #$endOfMatch should be at most the length of current line minus 1.
               if ($endOfMatch > length($tableBlock[$tablei])-1)
               {   $endOfMatch = length($tableBlock[$tablei])-1;   }

               if ($startOfMatch <= $endOfMatch)
               {  #Add as an element to existing divField
                  if (substr($tableBlock[$tablei], $startOfMatch, 1) eq " ")
                  {
                     $divFieldData[$tablei-$startDivField]
                         = substr($tableBlock[$tablei], $startOfMatch, 
                                  $endOfMatch-$startOfMatch+1);
                     #Next line.
                     $tablei++;
                  }
                  else
                  {   $indented = -1;   }
               }
               else
               {  #Append a blank line.

                  $divFieldData[$tablei-$startDivField] = "";
                  #Next line.
                  $tablei++;
               }
            }#(($tablei < scalar(@tableBlock) && $tableLineState[$tablei] == 2)
             #    && ($indented == 1))

            &BeginTag("DIVFIELD");
            &processDataText(@divFieldData);              
            &EndTag(); #</DIVFIELD>
          
         }# (($tablei < scalar(@tableBlock)) && $tableLineState[$tablei] == 2)
         &EndTag("FIELD");  #</FIELD>
      } #End of loop ($tableFieldInd = 0; $tableFieldInd < length(@tableFieldPos)-1;

      while ($tableLineState[$tablei]==2)
      {
         #Skip lines.
         $tablei++;
      }
      if ($tableBlock[$tablei] =~ /^\s*(--|---)+\s*$/x) 
      {
         $tablei++; #Skip the line of dashes.
      }
   }
}
1;
__END__;

=pod
