package LDMS::Xrefs;


##############
#Xrefs.pm
#
#The Xref module finds and tags cross-references to US
#US code and public Law documents.
#handleXrefs is a module for identifying and tagging up cross
#references in DATATEXT.
#
#
# $Header: /home/LDMS/cvsroot/LDMS/LDMS/Xrefs.pm,v 1.4 2000/12/01 20:02:47 jcl53 Exp $
#
#author:Sylvia Kwakye (sbk22@cornell.edu)
#
###

use strict;
use warnings;
use lib '..';
use LDMS::XMLOut;
use LDMS::State;
use LDMS::FootRef;
use LDMS::Error;
require 5.002;

BEGIN {
    use Exporter ();
    our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);

    # Set our version (for module version checking).
    $VERSION = do {my @r = (q$Revision: 1.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r};

    @ISA = qw(Exporter);

    # Exported functions.
    @EXPORT = qw(&handleXrefs);
    %EXPORT_TAGS = ( );
      
    # Exported global variables and optional exported functions.
    @EXPORT_OK = qw();

}
our @EXPORT_OK;

# Non-exported global variables.
  my @seq;           #stores current structural position
  my $index;         #length of sequence
  my $titleNum;      #title number extracted from state machine 
  my $chapterNum;    #chapter number extracted from state machine
  my $secNum;        #section number
  my $block;         #string of DATATEXT
  my $eachXref;      #one partially tagged xref
  my @tokens;        #array of tokens
  my $numTokens;     #number of words in $eachXref
  my $numXrefs;      #number of xrefs
  my @allXrefs;      #array of all xrefs
  my $sChapterNum;   #chapter number obtained from state machine   
  my $sTitleNum;     #title number obtained from state machine
 
# Initialize exported global variables.
 

# Initialize non-exported global variables.


# Create private global variables.


# Prototype functions.
sub handleXrefs(@);       #Top level function that calls the others
sub markXrefs;            #bookmarks the starts and ends of known xrefs
sub processListXrefs;     #processes list type xrefs
sub processXrefs;         #checks syntax of xref and calls the appropriate
                          #target function to extract info from the xref
sub multiTarget1;         #processes plural type xrefs
sub multiTarget2;         #             "
sub multiTarget3;         #             "
sub multiTarget4;         #             " 
sub multiTarget5;         #             " 
sub uscTargets;           #processes xrefs of the type 1 USC 112
sub pubTargets;           #processes pulic law xrefs
sub singleTarget;         #processes single type xrefs
sub writeToXML;           #sends output of handleXrefs to FootRef and XMLOut 

# Module destructor..
END { } 


#####################################################################
#handleXrefs is a module for identifying and tagging up cross
#references in the processDataText function.

#Target convention
#section_chapter_title.Within each field, multiple reference are
#separated with a . eg. usc:1.4.5_2_10 = sections 1,4 and 5,chapter
#2, title 10
#If any entry is 0 then that information was not available.
#Most important target info = section and title. Section numbers
#are unique within a title. eg.one can never have chapter 1 section 4
#and chapter 2 section 4.


#pre:argument is a block of text
#post:writes tagged block to XML output file
#####################################################################
sub handleXrefs(@){
  my  @blockArray = @_;            #get input and store in blockArray

  if(! defined($blockArray[0]))
  {
      &markupFootRefs(@blockArray);
      return;
  }

  #if input is fine continue
  @seq = &getSequence();           #stores current structural position
  $index = scalar(@seq);           #length of sequence
  $sTitleNum = $seq[$index-1];     #title number extracted from state machine 
  $sChapterNum = $seq[$index-2];   #chapter numberextracted from state machine
  $block = join("\n",@blockArray); #input array converted to a string


  &markXrefs();          #bookmark positions of plural type xref
  &processListXrefs();   #extract information to creat list type xrefs
  &processXrefs();       #extract information to creat XREF tag and attribute
  &writeToXML();         #send tagged output to XML output buffer

}

############################################################################
##markXrefs bookmarks all known starts and ends of us code and public
#law  cross references
#the patterns are separated by | in the function. The order of matching  
#is listed below (not all permutations for each pattern is shown).

#1) #match chapter x sections/chapters/sections of this title/chapter (plural type xref)
#2) #match chapter x sections/chapters/sections of title/chapter y (plural type xref)
#3) #match title x chapters/sections y1,y2 .. and yn (plural type xref)
#4) #match title/chapter x section y1,y2...and yn (plural type xref)
#5) #match titles d1, d2,.. (plural type xref)
#6) #match chapter x section/chapter/section of this title/chapter (single type xref)
#7) #match chapter x section/chapter/section of title/chapter y (single type)
#8) #match title x chapter/section y d/d(i)/di(i)(x) etc. (single type)
#9) #match title/chapter x section y (single type)
#10)#match title d1, d2,.. (single type)
#11)#match xrefs of the form 1 U.S.C. sec./sections 211,212, ...
#12)#match xrefs of the form 1 U.S.C. chapters 2, 4, ..
#13)#match xrefs of the form 1 U.S.C. sec./section 211
#14)#match xrefs of the form 1 U.S.C. chapter 2
#15)#match pub l. d1-d2 sections d3, d4 .. or pub l. d1-d2 sections d3 d4.. 
#16)#match sections d1, d2, .. of pub.l. d1-d2 or section d, d2..pub. l. d1-d2 
#17)#match pub l. d1-d2 section d3 or pub l. d1-d2 section d3
#18)#match section d of pub.l. d1-d2 or section d,pub. l. d1-d2 

#pre: requires an already defined a block of text (string)
#post: xrefs are marked with tags in block. 

############################################################################
sub markXrefs{
  my $and = "and";
  my $stat = "Stat.";

  $block =~ s{(
	       ((chapter\s+\d+.*?sections|chapters|sections.*?\d+.*?chapter|sections)
		\s+.*?\d+.*?(?!$stat|pub\.\s+\l\.)\s+of\s+this\s+(title|chapter))	  
	       |((chapter\s+\d+,*?sections|chapters|sections.*?\d+.*?chapter|
		  sections.*?\d+.*?chapter|sections).*?\d+.*?\s+.*?(title|chapter)\s+(\d+|[IVXL]*)) 
	       |(title\s+(\d+|[IVXL]*),*.*?(?!$stat|pub\.\s+\l\.|act|title)
		 \s+(chapter\s+\d+,*?\s+sections|chapters|sections).*? 
		(;|\.|$and\s+(d+\(*\w\)*-\(*\w\)*|\d+\(*\w\)*\(*\d+\)*|\d+\(*\w\)*|\d+))) 
	       |((title|chapter)\s+(\d+|[IVXL]*),*.*?(?!$stat|pub\.\s+\l\.|act)
		 \s+sections.*?\d+.*?(;|\.|$and\s+
		 (\d+\(*\w\)*-\(*\w\)*|\d+\(*\w\)*\(*\d+\)*|\d+\(*\w\)*|\d+))) 
	       |(titles((\s+(\d+|[IVXL]*)),*)+) 
	       |((chapter\s+\d+.*?\(*sec(\.|tion)|sec(\.|tion)\s+\d+.*?chapter|
		  chapter|\(*sec(\.|tion))\s+\d+.*?(?!$stat|pub\.\s+\l\.)
		 \s+of\s+this\s+(title|chapter))
	       |((chapter\s+\d+.*?\(*sec(\.|tion)|sec(\.|tion)\s+\d+.*?chapter|
		  chapter|\(*sec(\.|tion))\s+\d+.*?(?!$stat|pub\.\s+\l\.)
		 \s+(title|chapter)\s+(\d+|[IVXL]*))
	       |(title\s+(\d+|[IVXL]*),*\s+.*?(?!$stat|pub\.\s+\l\.|act)\s+
		 (chapter\s+\d+.*?sec(\.|tion)|sec(\.|tion)\s+\d+.*?chapter|
		  chapter|sec(\.|tion))\s+(\d+\(*\w\)*-\(*\d+\)*|\d+\(\w\)|\d+\w|\d+))
	       |((title|chapter)\s+(\d+|[IVXL]*),*\s+.*?(?!$stat|pub\.\s+\l\.|act)
		 \s+sec(\.|tion)\s+(\d+\(*\w\)*-\(*\d+\)*|\d+\(*\w\)*|\d+))
	       |(title\s+(\d+|[IVXL])\b)
               |(\d+\s+U\.*S\.*C\.*\s+(sec(\.|tions))*(\s+(\d+-\d+|\d+\w*),*)+) 
               |(\d+\s+U\.*S\.*C\.*\s+chapters(\s+(\d+-\d+|\d+\w*),*)+)
	       |(\d+\s+U\.*S\.*C\.*\s+chapter\s+(\d+-\d+|\d+\w*))
	       |(\d+\s+U\.*S\.*C\.*(\s+sec(\.|tion))*\s+(\d+-\d+|\d+\w*))      
	       |((pub\.\s+L\.|public\s+law)\s+\d+-\d+,*\s+sec(\.|tions)
		 ((\s+\d+\(*\w\)*-\(*\w\)*|\d+\(*\w\)*\(*\d+\)*|\d+\(*\w\)*|\d+),*)+)
	       |(sec(\.|tion)(\s+(\d+\(*\w\)*-\(*\w\)*|\d+\(*\w\)*\(*\d+\)*|\d+\(*\w\)*|\d+),*)+
		 (\s+of|,)*\s+(pub\.\s+L\.|public\s+law)\s+\d+-\d+) 
	       |((pub\.\s+L\.|public\s+law)\s+\d+-\d+,*\s+
		 (sec(\.|tion)\s+(\d+\(*\w\)*-\(*\w\)*|\d+\(*\w\)*\(*\d+\)*|\d+\(*\w\)*|\d+))*)
	       |(sec(\.|tion)\s+(\d+\(*\w\)*-\(*\w\)*|\d+\(*\w\)*\(*\d+\)*|\d+\(*\w\)*|\d+)
		 (\s+of|,)*\s+(pub\.\s+L\.|public\s+law)\s+\d+-\d+)
	       )
	     }{<XREF target=crossref>$1<\/XREF>}xgim;

}

#############################################################################
#References sometimes span pages with the title on top and a list of 
#sections- title d1:sections d1,d2 ..;sections d3 .. (list type xref)
#This is where one is also likely to find footnotes.

#pre:requires bookmarked block string
#post:block is completely tagged with xref tag and attribute(target)

#ListXrefs not only matches information needed to set a target that 
#references a listXref.
############################################################################# 

sub processListXrefs{
  my @listXrefs =();     #an array of listXrefs
  my $target = "";       #default target
  my $secNums = "";      #section numbers in xref
  my $listXref = "";     #one element of the listXref
  
  #Find and bookmark all list type xrefs
  $block =~ s{(?:following\s+provisions\s+of\s+)
	      ([tT]itle\s+(\d+):\s+.*[Ss]ection.*;)(?:\n\(\d+\))*
	     }{<XREF target=list_0_$2>$1<\/XREF>}xgms;  
 
 #put all listxrefs found into an array
  @listXrefs = ($block =~ /(<XREF\s+target=list.*>.*?<\/XREF>)/gims);
  my $numXrefs = scalar(@listXrefs); #number of listXrefs found
  
  if($numXrefs > 0){

    #extract the title number
    foreach $listXref(@listXrefs){
      $listXref =~ /title\s+(\d+)/i;
      my $titleNum = $1;
      
      #initialize a new xref
      my $newXref = $listXref;

      #get the words in the listxref
      my @Tokens = split(/\s+/,$listXref);

      #extract the section numbers from the word array
      for(my $i=2;$i<scalar(@Tokens);$i++)
	{
	  if($Tokens[$i] =~ /\d+/ && $Tokens[$i-1] !~ /footnote/i)
	    {
	      $secNums .="$Tokens[$i];"; 
	    }
	}
    
      #set target to title, chapter and section numbers
      $target = $secNums."_0_".$titleNum;
      $target =~ s/(\s+|,)//;

      #find the current target the new one will be replacing
      my $curTarget = qw(list_0_\d+);
      

      #update the block
      # $newXref  =~ s/$curTarget/$target/;
      # $block =~ s/$listXref/$newXref/;
      
      # @diagnostics
      # print $block;
      # print "title:$titleNum\n";
      # print "SECNUM:$secNums\n";
      # print "$newXref\n";
    }
  }
}


##########################################################################
#References occur in a variety of combinations. processXrefs calls  
#several functions that use a variety of patterns to locate the xrefs
#and point them to a target.Targets are of the form usc:section_chapter_title.
#Mutiple targets in one field are separated by ; eg.usc:1;2;4;5_10_50 for
#example refers to US code Title 50, chapter 10, sections 1,2,4 and 5.

#pre: A block of partially tagged text.
#post: Block is completely tagged with tags and attributes.
#########################################################################
sub processXrefs{
   @allXrefs = ();          #array of all found plural xrefs
   $eachXref = "";          #one plural xref
   @tokens = ();            #one element in a plural xref
   $numTokens = 0;          #number of tokens in @tokens
 
   #Get all the xrefs in the block
   @allXrefs = ($block =~ /<XREF\s+target=crossref>.*?<\/XREF>/gims);
   $numXrefs = scalar(@allXrefs); #number of xrefs found
  
   #look at the pattern of each xref and assign it to the appropriate
   #function for processing
   unless($numXrefs == 0){
       foreach $eachXref(@allXrefs){
	   @tokens = split(/\s+/,$eachXref);
	   $numTokens = scalar(@tokens);
	   
	   #get pattern 1 Xrefs
	   if($eachXref =~ /title/i && $eachXref =~ /chapter/i && $eachXref =~ /sections/i)
	   {
	       &multiTarget1();  
	   }      
	   #get pattern 2 Xrefs
	   elsif($eachXref =~ /title/i && $eachXref =~ /chapters/i && $eachXref !~ /section/i)
	   {
	       &multiTarget2();   
	   }      
	   
	   #get pattern 3 Xrefs 
	   elsif($eachXref =~ /title/i && $eachXref !~ /chapter/i && $eachXref =~ /sections/i)
	   {
	       &multiTarget3();  
	   }
	   
	   #get pattern 4 Xrefs 
	   elsif($eachXref !~ /title/i && $eachXref =~ /chapter/i && $eachXref =~ /sections/i)
	   {       
	       &multiTarget4(); 
	   }  
	   
	   #get pattern 5 Xrefs
	   elsif($eachXref =~ /titles/i)
	   {
	       &multiTarget5();
	   }
	   
	   #get 1 USC 112 type xrefs
	   elsif($eachXref =~ /USC|U\.S\.C\./)
	   {
	       &uscTargets();
	   }
	   
	   #get public law patterns
	   elsif($eachXref =~ /Pub\.\s+L\.|Public\s+Law/i)
	   {
	       &pubTargets();
	   }					    			     
	   #get single type xref patterns 6-10
	   else {
	       &singleTarget();
	   }
       }
   }
}			     






##############################################################################
#This function matches Public Law references and provides target information
#to complete the tagging begun in markXrefs().
#
#PRE: A bookmarked block must be available to operate on.
#POST: The block is completely tagged with reference to Pub. L. 
#
#
#Example,Pub. L. 104-199, Sec. 1(a) is tagged as
#<XREF target=pl:1(a)_104-199>,Pub. L. 104_199, Sec. 1(a)</XREF>
#If there are no sections specified, the section field is 0
############################################################################# 

sub pubTargets{
    my $chapterNum = 0;            #chapter number
    my $pubNum = 0;                #publication number
    my $secNum = 0;                #section numbers
    my $target = "";               #string to store xref target
    my $newXref = $eachXref;       #make copy of xref
    my $leftOver = "XREF>";        #last characters in xref

#check first to see if there is an uneven number of brackets.
  #the LDMS will crash if that happens so such instances are
  #not processed.
  my @openBrackets = ($eachXref =~ /\(/g);
  my @closeBrackets = ($eachXref =~ /\)/g);
  if(scalar(@openBrackets) != scalar(@closeBrackets)){return;}

#get the publication number
$eachXref =~ /(?:pub\.\s+l\.|public\s+law)\s+(\d+-\d+)/i;
   $pubNum = $1;

#get the section number
if($eachXref =~ /sec(\.|tion)/i)
   {
     $eachXref =~ /(?:sec\.|section)\s+(.*?)(\s+|<)/i;
     $secNum = $1;
   }

#if there is more than 1 section, search through the word elements
#of the xref to find them.
if($eachXref =~ /sections/i)
  {
    if($tokens[1] =~ /sections/i){
	for(my $i=0;$i<$numTokens;$i++){
	  if($tokens[$i] =~ /\d+/ && $tokens[$i] !~ /pub/i){
	    $secNum .= "$tokens[$i];";
	  }
	}
      }

    if($tokens[1] =~ /pub/i){
	for(my $i=0;$i<$numTokens;$i++){
	  if($tokens[$i] =~ /\d+/ && $tokens[$i] !~ /$leftOver/i){
	    $secNum .= "$tokens[$i];";
	  }
	}
      }
  }


 #set target to title, chapter and section numbers
  $target = "pl:".$secNum."_".$chapterNum."_".$pubNum;
  $target =~ s/(\s+|,)//g;

  #update the Xref to include the target
  $newXref =~ s/crossref/$target/;
  $block =~ s/$eachXref/$newXref/;
}
  


sub uscTargets{
  my $chapterNum = 0;            #chapter number
  my $titleNum = 0;              #title number
  my $secNum = 0;                #section numbers
  my $target = "";               #string to store xref target
  my $newXref = $eachXref;       #make copy of xref
  my $before = "";               #string before title number 
  my $leftOver="/XREF>";         #last characters in xref  

#check first to see if there is an uneven number of brackets.
  #the LDMS will crash if that happens so such instances are
  #not processed.
  my @openBrackets = ($eachXref =~ /\(/g);
  my @closeBrackets = ($eachXref =~ /\)/g);
  if(scalar(@openBrackets) != scalar(@closeBrackets)){return;}  

#extract title number 
  ($before, $titleNum) = split(/>/,$tokens[1]);
 
  #extract section/chapter info from single type xrefs of this form
  if($eachXref !~ /chapters|sections/i || 
     $eachXref =~ /(\d+-\d+|\d+\w*),*?(\s+(\d+-\d+|\d+\w*),*?)+/)
    {
      #extract chapter number
      if($eachXref =~ /chapter/i)
	{
	  ($chapterNum,$leftOver) = split(/</,$tokens[$numTokens-1]);
	}
      #extract section number
      else
	{
	  ($secNum,$leftOver) = split(/</,$tokens[$numTokens-1]);
	}
    }
  #extract section or chapter numbers from plural type xrefs of this form
  else
    {
    my $nums = "";
    for(my $n=2;$n<$numTokens;$n++)
      {
	if($tokens[$n] =~ /\d+/  && $tokens[$n] !~ /chapter/i)
	  {
	    $nums .= "$tokens[$n];";      #build list  
	  }
	if($tokens[$n] =~ /chapter/i)
	  {
	    
	    last;                            #get out of for loop 
	  }
      }
    if($eachXref =~ /chapters/i){$chapterNum = $nums;}
    else{$secNum = $nums;}
  }
 
  #set target to title, chapter and section numbers
  $target = "usc:".$secNum."_".$chapterNum."_".$titleNum;
  $target =~ s/(\s+|,)//g;

  #update the Xref to include the target
  $newXref =~ s/crossref/$target/;
  $block =~ s/$eachXref/$newXref/;
  
  
  #diagnostics
  #my @diag = (1,$tokens[1],$numTokens,$titleNum,$chapterNum,$target,$eachXref,$newXref);
  #&diagnostics(@diag);
}



###############################################################################
##singleTarget looks for chapter section and title combinations and extracts the 
#necessary information to creat a target for an XREF tag
#
#Pre:Requires a block with partially tagged cross reference to operate on.
#Post:Completes the tagging by updating the target attributes of the xref
##############################################################################
sub singleTarget{
  my $chapterNum = 0;            #chapter number
  my $titleNum = $sTitleNum;     #title number
  my $secNum = 0;                #section numbers
  my $target = "single";         #string to store xref target
  my $newXref = $eachXref;       #make copy of xref
 
#check first to see if there is an uneven number of brackets.
  #the LDMS will crash if that happens so such instances are
  #not processed.
  my @openBrackets = ($eachXref =~ /\(/g);
  my @closeBrackets = ($eachXref =~ /\)/g);
  if(scalar(@openBrackets) != scalar(@closeBrackets)){return;}

 
  #extract title number  
  if($eachXref =~ /of\s+this\s+title/i)
    {
      $titleNum = $sTitleNum;
    }    
  elsif($eachXref =~ /title\s+(\d+|[IVXL]*)/i)
    {
      $titleNum = $1;
    }
  else{$titleNum = $sTitleNum;}
    
  #extract chapter number
  if($eachXref =~ /of\s+this\s+chapter/i)
    {
      $chapterNum = $sChapterNum;
    } 
  if($eachXref =~ /chapter\s+(\d+)/i)
    {
      $chapterNum = $1;
    }
        
  #extract section number
  if($eachXref =~ /sec(?:\.|tion)\s+((\d+.*?)(?:\s+)|\d+)/i)
    {  
      $secNum = $1;     
    }
   
  #set target to title, chapter and section numbers
  $target = "usc:".$secNum."_".$chapterNum."_".$titleNum;
  $target =~ s/(\s+|,)//g;

  #update the Xref to include the target
  $newXref =~ s/crossref/$target/;
  $block =~ s/$eachXref/$newXref/;
  
  
  #diagnostics
  #my @diag = (1,$tokens[1],$numTokens,$titleNum,$chapterNum,$target,$eachXref,$newXref);
  #&diagnostics(@diag);
}


###############################################################################
##multiTarget looks for chapter section and title combinations and extracts the 
#necessary information to creat a target for an XREF tag
#
#Pre:Requires a partially cross reference to operate on.
#Post:Completes the tagging by updating the target attributes of the xref
##############################################################################
sub multiTarget1{
  my @tNum = ();                 #temporary storage variable for title number
  my @cNum = ();                 #temporary storage variable for chapter  number
  my $secNums = "";              #ditto for section numbers
  my $target = "";               #string to store xref target
  my $lastNum = 0;               #last number in a reference.
  my $leftOver = "XREF>";        #whatever is after lastNum 
  my $newXref = $eachXref;       #make copy of xref
  my $chapterNum = 0;            #chapter number
  my $titleNum = $sTitleNum;     #title number
  my $n=0;                       #loop counter
  my $i=0;                       #loop counter
  my $and = "and";
  

 
 #check first to see if there is an uneven number of brackets.
  #the LDMS will crash if that happens so such instances are
  #not processed.
  my @openBrackets = ($eachXref =~ /\(/g);
  my @closeBrackets = ($eachXref =~ /\)/g);
  if(scalar(@openBrackets) != scalar(@closeBrackets)){return;}
 
  ######## title chapter section order ###########

  #check for  xref starting with the keyword title
  if($tokens[1] =~ /title/i){
    
    #extract title number
    @tNum = ($eachXref =~ /(\d+|[IVXL]*)/);              
    $titleNum = $tNum[0];
    
    #extract chapter number
    @cNum = ($eachXref =~ /chapter.*?(\d+)/i);   #first num after chapter.
    $chapterNum = $cNum[0];
    
    #find the last number in the list of sections
    ($lastNum,$leftOver) = split(/</,$tokens[$numTokens-1]);
    
    #Find the rest of the section numbers
    for($i=0;$i<$numTokens;$i++)
      {
	if($tokens[$i] =~ /sections/i){last;}  #Do nothing until section
      }               
    for($n=$i+1;$n<$numTokens;$n++)            #from now on store numbers
      {
	if($tokens[$n] !~ /$and/i && $tokens[$n] !~ /$leftOver/i && $tokens[$n] !~ /act/i)
	  {
	    $secNums .= "$tokens[$n];";        #build list  
	  }
	if($tokens[$n] =~ /$leftOver/i)
	  {
	    $secNums .= ".$lastNum";           #add last number. 
	  }
      }
    
    #set target to title, chapter and section numbers
    $target = "usc:".$secNums."_".$chapterNum."_".$titleNum;
    $target =~ s/,//g;

    #update the Xref to include the target
    $newXref =~ s/crossref/$target/;
    $block =~ s/$eachXref/$newXref/;
  }
  
  ######### chapter sections title order ##########
  
   #check for  xref starting with the keyword chapter
  if($tokens[1] =~ /chapter/i){
    
    #extract title number
    @cNum = ($eachXref =~ /(\d+)/);           #first matched number
    $chapterNum = $cNum[0];
    
    #extract title number
    if($tokens[$numTokens-1] !~ /title/i)
      {
	($titleNum,$leftOver) = split(/</,$tokens[$numTokens-1]);
      }
    
    #extract sections
    for($i=0;$i<$numTokens;$i++)
      {
	if($tokens[$i] =~ /sections/i)        #Do nothing until section label is found
	  {
	    last;
	  }
      }               
    for($n=$i+1;$n<$numTokens;$n++)           #start collecting section numbers
      {
	if($tokens[$n] =~ /\d+/i && $tokens[$n] !~ /title/i && $tokens[$n] !~ /act/i)
	  {
	    $secNums .= "$tokens[$n];";       #building list of section numbers  
	  }
	if($tokens[$n] =~ /title/i || $tokens[$n] =~ /act/i)
	  {
	    last;                             #get out of for loop when done
	  }
      }
    
    #set target to title, chapter and section numbers
    $target = "usc:".$secNums."_".$chapterNum."_".$titleNum;
    $target =~ s/,//g;

    #update the block with target info
    $newXref =~ s/crossref/$target/;
    $block =~ s/$eachXref/$newXref/;
  }
  
  ############ sections chapter title order ##################
  
   #check for xref starting with the keyword section
  if($tokens[1] =~ /section/i){
    
    #extract title number
    @cNum = ($eachXref =~ /chapter.*?(\d+)/);    # match chapter number
    $chapterNum = $cNum[0];
    
    #extract title number
    if($tokens[$numTokens-1] !~ /title/i)
      {
	($titleNum,$leftOver) = split(/</,$tokens[$numTokens-1]); #title number
      }
    
    #extract section numbers
    for($n=2;$n<$numTokens;$n++)
      {
	if($tokens[$n] =~ /\d+/  && $tokens[$n] !~ /chapter/i)
	  {
	    $secNums .= "$tokens[$n];";      #build list  
	  }
	if($tokens[$n] =~ /chapter/i)
	  {
	    
	    last;                            #get out of for loop 
	  }
      }
    
    #set target to title, chapter and section numbers
    $target = "usc:".$secNums."_".$chapterNum."_".$titleNum;
    $target =~ s/,//g;

    #update the block with target info
    $newXref =~ s/crossref/$target/;
    $block =~ s/$eachXref/$newXref/;
    
  }#end of chapter,title,section check
  

  #diagnostics
  #my @diag = (1,$tokens[1],$numTokens,$titleNum,$chapterNum,$leftOver,$target,$eachXref,$newXref);
  #&diagnostics(@diag); 

}
#################################################################################
##multiTarget2 looks for chapter sections combinations and extracts the 
#necessary information to creat a target for an XREF tag
#
#Pre:Requires a partially cross reference to operate on.
#Post:Completes the tagging by updating the target attributes of the xref
################################################################################     

sub multiTarget2{
  my @cNum = ();                 #temporary storage variable for chapter  number
  my $secNums = 0;               #ditto for section numbers
  my $target = "";               #string to store xref target
  my $lastNum = 0;               #last number in a reference.
  my $leftOver = "XREF>";        #whatever is after lastNum 
  my $newXref = $eachXref;       #make copy of xref
  my $chapterNums = "";          #list of chapter numbers
  my $chapterNum = 0;            #chapter number
  my $titleNum =$sTitleNum;              #default title number

  
 #check first to see if there is an uneven number of brackets.
  #the LDMS will crash if that happens so such instances are
  #not processed.
  my @openBrackets = ($eachXref =~ /\(/g);
  my @closeBrackets = ($eachXref =~ /\)/g);
  if(scalar(@openBrackets) != scalar(@closeBrackets)){return;}



 #check for xref starting with the keyword title
  if($tokens[1] =~ /chapter/i){
    
    #extract title number
    if($tokens[$numTokens-1] !~ /title/i)
      {
	($titleNum,$leftOver) = split(/</,$tokens[$numTokens-1]); #title number
      }

    #extract chapter numbers
    for(my $n=2;$n<$numTokens;$n++)
      {
	if($tokens[$n] =~ /\d+/  && $tokens[$n] !~ /title/i && $tokens[$n] !~ /$leftOver/i)
	  {
	    $chapterNums .= "$tokens[$n];";      #build list of numbers
	  }
	if($tokens[$n] =~ /title/i || $tokens[$n] =~ /$leftOver/i)
	  {
	    last;                                #get out of for loop if title label found
	  }
      }
    
    #set target to title, chapter and section numbers
    $target = "usc:".$secNums."_".$chapterNums."_".$titleNum;
    $target =~ s/,//g;

    #update the block with target info
    $newXref =~ s/crossref/$target/;
    $block =~ s/$eachXref/$newXref/;

      } #end of first chapter title check	  
     
  
  #check for xref starting with the keyword title
  if($tokens[1] =~ /title/i){

    #extract title number
    my @tNum = ($eachXref =~ /(\d+)|[IVXL]*/);              #first matched number is title
    $titleNum = $tNum[0];

    #find last chapter number listed
    ($lastNum,$leftOver) = split(/</,$tokens[$numTokens-1]);

    #extract the rest of the chapter numbers
    for(my $n=2;$n<$numTokens;$n++)
      {
	if($tokens[$n] =~ /\d+/  && $tokens[$n] !~ /$leftOver/i)
	  {
	    $chapterNums .= "$tokens[$n];";           #build list  
	  }

	if($tokens[$n] =~ /$leftOver/i)
	  {
	    $chapterNums .= ".$lastNum";               #add last number and get out of loop 	   
	  }
      }
    #set target to title, chapter and section numbers
    $target = "usc:".$secNums."_".$chapterNums."_".$titleNum;
    $target =~ s/,//g;

    #update the block with target info
    $newXref =~ s/crossref/$target/;
    $block =~ s/$eachXref/$newXref/;
     
     }#end of second chapter title check

  #diagnostics
  #my @diag = (2,$tokens[1],$numTokens,$titleNum,$chapterNum,$leftOver,$target,$eachXref,$newXref);
  #&diagnostics(@diag);
       
}

############################################################################
##multiTarget3 looks for title sections combinations and extracts the 
#necessary information to creat a target for an XREF tag
#
##Pre:Requires a partially cross reference to operate on.
#Post:Completes the tagging by updating the target attributes of the xref
#
###########################################################################
sub multiTarget3{
  my @tNum = ();                 #temporary storage variable for title number
  my @cNum = ();                 #temporary storage variable for chapter  number
  my $secNums = "";              #ditto for section numbers
  my $target = "";               #string to store xref target
  my $lastNum = 0;               #last number in a reference.
  my $leftOver = "XREF>";        #whatever is after lastNum 
  my $newXref = $eachXref;       #make copy of xref
  my $titleNum = $sTitleNum;     #set default yitle number
  my $chapterNum = 0;            #default chapter number

  #check first to see if there is an uneven number of brackets.
  #the LDMS will crash if that happens so such instances are
  #not processed.
  my @openBrackets = ($eachXref =~ /\(/g);
  my @closeBrackets = ($eachXref =~ /\)/g);
  if(scalar(@openBrackets) != scalar(@closeBrackets)){return;}


  #check for xref starting with the keyword sections
  if($tokens[1] =~ /section/i){

    #extract title number
    if($tokens[$numTokens-1] !~ /title/i)
      {
	($titleNum,$leftOver) = split(/</,$tokens[$numTokens-1]); #title number
      }
    
    #extract section numbers
    for(my $n=2;$n<$numTokens;$n++)
      {
	if($tokens[$n] =~ /\d+/  && $tokens[$n] !~ /title/i &&
	   $tokens[$n] !~ /$leftOver/i  && $tokens[$n] !~ /act/i)
	  {
	    $secNums .= "$tokens[$n];";  #build list of numbers
	  }
	if($tokens[$n] =~ /title/i || $tokens[$n] =~ /act/i || $tokens[$n] =~ /$leftOver/i)
	  {
	    last;                        #get out of for loop when token = title
	  }
      }

    #set target to title, chapter and section numbers
    $target = "usc:".$secNums."_".$chapterNum."_".$titleNum;
    $target =~ s/,//g;

    #update the block with target info
    $newXref =~ s/crossref/$target/;
    $block =~ s/$eachXref/$newXref/;

    #diagnostics
    #my @diag=(3,$tokens[1],$numTokens,$titleNum,$chapterNum,$leftOver,$target,$eachXref,$newXref);
    #&diagnostics(@diag);
  } #end of first section title check	  
     
  #check that xref starts with the keyword title
  if($tokens[1] =~ /title/i){

    #extract title number
    @tNum = ($eachXref =~ /(\d+|[IVXL]*)/);              #first matched number is title
    $titleNum = $tNum[0];

    #find last number in list of sections
    ($lastNum,$leftOver) = split(/</,$tokens[$numTokens-1]);
    
    #extract section numbers
    for(my $n=2;$n<$numTokens;$n++)
      {
	if($tokens[$n] =~ /\d+/  && $tokens[$n] !~ /$leftOver/i)
	  {
	    $secNums .= "$tokens[$n];";            #build list  
	  }
	if($tokens[$n] =~ /$leftOver/i)
	  {
	    $secNums .= "$lastNum";               #add last number when closing tag is hit
	  }
      }
    
    #set target to title, chapter and section numbers
    $target = "usc:".$secNums."_".$chapterNum."_".$titleNum;
    $target =~ s/,//g;
    
    #update the block with target info
    $newXref =~ s/crossref/$target/;
    $block =~ s/$eachXref/$newXref/;
  }#end of second section title check


  #diagnostics
  # my @diag=(3,$tokens[1],$numTokens,$titleNum,$chapterNum,$leftOver,$target,$eachXref,$newXref);
  # &diagnostics(@diag);
}


############################################################################
#multiTarget4 looks for chapter section combinations and extracts the 
#necessary information to creat a target.
#
#Pre:Requires a partially cross reference to operate on.
#Post:Completes the tagging by updating the target attribute of the xref
############################################################################
sub multiTarget4{
  my @tNum = ();                 #temporary storage variable for title number
  my @cNum = ();                 #temporary storage variable for chapter  number
  my $secNums = "";              #ditto for section numbers
  my $target = "";               #string to store xref target
  my $lastNum = 0;               #last number in a reference.
  my $leftOver = "XREF>";        #whatever is after lastNum 
  my $newXref = $eachXref;       #make copy of xref
  my $titleNum = $sTitleNum;     #default title number
  my $chapterNum = $sChapterNum; #default chapter number

  #check first to see if there is an uneven number of brackets.
  #the LDMS will crash if that happens so such instances are
  #not processed.
  my @openBrackets = ($eachXref =~ /\(/g);
  my @closeBrackets = ($eachXref =~ /\)/g);
  if(scalar(@openBrackets) != scalar(@closeBrackets)){return;}

  #check that  xref starts with the keyword section
  if($tokens[1] =~ /section/i){

    #extract chapter number
    if($tokens[$numTokens-1] !~ /chapter/i)
      {
	($chapterNum,$leftOver) = split(/</,$tokens[$numTokens-1]); #chapter number
      }
    
    #extract section numbers
    for(my $n=2;$n<$numTokens;$n++)
      {
	if($tokens[$n] =~ /\d+/  && $tokens[$n] !~ /chapter/i)
	  {
	    $secNums .= "$tokens[$n];";                #build list  
	  }
	if($tokens[$n] =~ /chapter/i)
	  {
	    last;                  #get out of for loop 
	  }
      }

    #set target to title,chapter and section numbers
    $target = "usc:".$secNums."_".$chapterNum."_".$titleNum;
    $target =~ s/,//g;

    #update the block with target info
    $newXref =~ s/crossref/$target/;
    $block =~ s/$eachXref/$newXref/;
    
  } #end of first section-chapter check	  
  
  #check that xref starts with the keyword chapter
  if($tokens[1] =~ /chapter/i){
    
    #extract title number
    @cNum = ($eachXref =~ /(\d+)/);              #first matched number is title
    $chapterNum = $cNum[0];

    #extract last section number
    ($lastNum,$leftOver) = split(/</,$tokens[$numTokens-1]);

    #extract section numbers
    for(my $n=2;$n<$numTokens;$n++)
      {
	if($tokens[$n] =~ /\d+/  && $tokens[$n] !~ /$leftOver/i)
	  {
	    $secNums .= "$tokens[$n];";            #build list  
	  }
	if($tokens[$n] =~ /$leftOver/i)
	  {
	    $secNums .= "$lastNum";               #add last number. 
	  }
      }
   
    #set target to title, chapter and section numbers
    $target = "usc:".$secNums."_".$chapterNum."_".$titleNum;
    $target =~ s/,//g;

    #update the block with target info
    $newXref =~ s/crossref/$target/;
    $block =~ s/$eachXref/$newXref/;
    
  }#end of second section title check

 #diagnostics
  #my @diag = @diag = (4,$tokens[1],$numTokens,$titleNum,$chapterNum,$leftOver,$target,$eachXref,$newXref);
  # &diagnostics(@diag);
     
}

#################################################################################
#checking for a list of titles and tagging them.
#pre: partially tagged block of text.
#post:completely tagged block 
#################################################################################

sub multiTarget5
  { 
    my $secNum = 0;              #ditto for section numbers
    my $target = "";               #string to store xref target    
    my $leftOver = "XREF>";        #whatever is after lastNum 
    my $newXref = $eachXref;       #make copy of xref
    my $chapterNum = 0;            #chapter number
    my $titleNum = "";              #title number
  
    for(my $n=2;$n<$numTokens;$n++){
      if($tokens[$n] =~ /\d+/ && $tokens[$n] !~ /leftOver/)
	{
	  my $titleNum .= "$tokens[$n];";
	  }
      }
    #set target to title, chapter and section numbers
    $target = "usc:".$secNum."_".$chapterNum."_".$titleNum;
    $target =~ s/,//g;

    #update the block with target info
    $newXref =~ s/crossref/$target/;
    $block =~ s/$eachXref/$newXref/;
     
  }

#############################################################################
#writeToXML uses the standardized XML writer to output the block to
#a file. The text does not go directly to XMLOut, it is first passed 
#to the FootRef module for further processing. writeToXML ensures that
#the xref tags are in the right place.

#pre: requires a block of text and an open output file.
#post: tagged block written to file.
#############################################################################


sub writeToXML{

  my @allXrefs = ();       #an array of all xrefs
  my @nonXrefs = ();       #array of all non xref text
  my $numXrefs = 0;        #number of xrefs
  my $numNonXrefs = 0;     #number of non xrefs
  my $xrefText = "";       #text body of xref
  my $target = "";         #target of the xref
  my @toFootRef1 = ();     #one element array of non xrefs required by FootRef
  my @toFootRef2 = ();     #one element array of xref required by FootRef

  @nonXrefs = split(/<XREF.*<\/XREF>/,$block);  
  @allXrefs = ($block =~ /(<XREF.*?<\/XREF>)/gims);

  $numNonXrefs = scalar(@nonXrefs);
  $numXrefs = scalar(@allXrefs);

  if($numXrefs > 0)
    {
      for(my $i=0;$i<$numNonXrefs;$i++)
	{
	  #send text before tag to FootRef 
	  $toFootRef1[0] = $nonXrefs[$i]; 
	  &markupFootRefs(@toFootRef1);
       
	  if($i<$numXrefs)
	    {
	      #write start tag
	      &BeginTag("XREF");
	     
	      
	      #find the attribute information
	      $allXrefs[$i] =~ /<XREF target=(.*)>(.*)<\/XREF>/i;
	      $target = $1;

	      #add the target attribute
	      &AddAttribute("TARGET",$target);

	      #send text between the tags to FootRef
	      $xrefText = $2;
	      $toFootRef2[0] = $xrefText; 
	      &markupFootRefs(@toFootRef2);

	      #close the tag
	      &EndTag();
	   }
 
	}
    }

  #if no xrefs available, output block to FootRef.
  else 
  {
      $toFootRef1[0]=$block;
      &markupFootRefs(@toFootRef1);
  }

}



#============================================================================
#The following functions are for  testing of the Xref module.
#============================================================================  
###############################################################################
#getInput is used to supply input for standalone operation of the Xref module.
#It is called with a file name and returns an array of lines. 
###############################################################################
sub getTestInput
  {
    my $inputFile = shift;
    my @lineText = ();     #Stores the file as an array of lines
    open(IN, $inputFile) || die "Cannot open $inputFile: $!\n";
    @lineText = <IN>;
    chomp(@lineText);     
               
    return @lineText;
  }
  

sub diagnostics{
my @diag = @_;
  print "case $diag[0]\n";
  print "token1:$diag[1]\n";
  print "tokens:$diag[2]\n";
  print "titleNum:$diag[3]\n";  
  print "chapterNum:$diag[4]\n";
  print "leftOver:$diag[5]\n";
  print "Target:$diag[6]\n";
  print "eachXref:$diag[7]\n";
  print "newXref:$diag[8]\n";
  print "====================\n";
}


1;
__END__;




 








