#!perl ############################################################################################################ # # This is a demo perl program for CS430. Given a directory, it will read all of the files and for each file it # will perform two tasks # TASK 1: it will convert each input file to upper case and write the result to a an output file with a "U2L_ prefix" # TASK 2: it will reverse the char order of each word in each input file and write the result to a an output file with a # "REV_ prefix" # # Authors: Manpreet Singh (manpreet@cs.cornell.edu), Siddharth Anand (sanand@cs.cornell.edu) # Date: 9/01/01 # # ############################################################################################################ # The first line of all Perl programs on MS Win systems should be "#!perl". On Unix, the Perl interpretter # is usually at /usr/local/bin/perl and hence the first line should be "#!/usr/local/bin/perl" for Unix. # The special $0 variable stores the name of this program. For readability, we assign $0 to the scalar # program_name. $program_name = $0; # There should be atleast one argument to this perl program. The argument is passed to the program via an array, # known as @ARGV. The position of the last filled element of the array is held in the scalar variable $#ARGV. # THerefore, if there is one argument to the program, $#ARGV equals 0, and the length of the ARGV array is # $#ARGV + 1 = 0 + 1 = 1. if( $#ARGV != 0 ) { die "usage: perl assignment0.pl rel_dir_path\n"; } # To be able to read from the directory, for example to review the list of files that are in the directory, # we need to create a directory handle. If the directory name is invalid, then the program will exit and print # an error message. opendir(DIR, $ARGV[0]) or die "Dir doesn't exist\n"; # Repeated calls to the readdir() function for a specified directory handle will result in the assignment of # directory elements to the $file scalar. These will be strings such as ".", "..", and file names. When there are # no more files to be read, $file will be null or "". Conditional clauses evaluate to false when passed a value of # 0, "0", or "" (the empty string). while( $file = readdir(DIR) ) { # We would like to open an input file for reading and then create an output file to which we can write the # result. Assuming that there are no other directories in the target directory except for "." and "..", we # we want to avoid input files such as '.', '..', and this program. The 'if' conditional below skips # these specified directory elements. if( $file eq '.' or $file eq '..' or $file eq "$program_name" ) { next; } # Aside: what is the difference between double and single quotes? Double quotes mean that the values of # variables such as arrays (@an_array), hashes (%a_hash), and scalar ($a_scalar) variables will be substitued # for the variable name in the string expression. For example, the following set of commands will print "hello" # to the screen: # # $greeting = 'hello\n'; # print "$greeting"; # # If we use single quotes however, the variable name will not be evaluated and the name of the variable will # instead be used. So, the following set of commands will print "$greeting" to the screen: # # $greeting = 'hello\n'; # print '$greeting'; # Open an input file handle. open( INPUT_F, "$file" ); # Open an output file handle. The value of $L2U_file is formed from the concatenation (given by the binary '.' # operator on the two operands 'L2U_' and "$file"). Notice that the use of double quotes in the second operand # mean that the value of $file will be used because the variable $file will be evaluated. $L2U_file = 'L2U_'."$file"; open( OUTPUT_F, ">$L2U_file" ); # We would like to convert each file in the directory to upper case. The while clause here takes a file handle # and cycles through the file line by line, placing the current line in the implicit variable, $_. What constitutes a # line in this case is a series of characters terminated by a new-line character "\n". We have used a subroutine # named 'lower_to_upper'. It takes a line of characters, converts the chars to upper case, and assigns the result to # $out_string. $out_string is then written out to the output file via the handle OUTPUT_F. while() { # Each output string is written to the $out_string = &lower_to_upper($_); print OUTPUT_F "$out_string"; } # It is good practice to close the directory and file handles rather than expecting the OS to. close(OUTPUT_F); close(INPUT_F); } closedir(DIR); # This time, we will go through all of the files in the directory and reverse the order of the letters # in each word. The word order within the files will be preserved. The result will be written out to # an Output file with a 'REV_' prefix. MOst of the operations are the same as those above, so redundant # comments have been omitted. opendir(DIR, $ARGV[0]) or die "Dir doesn't exist\n"; while( $file = readdir(DIR) ) { if( $file eq '.' or $file eq '..' or $file eq "$program_name" ) { next; } open( INPUT_F, "$file" ); $REV_file = 'REV_'."$file"; open( OUTPUT_F, ">$REV_file" ); # Read the input file and call the 'rev_word' subroutine for each line. The result will be placed in the # $out_string scalar. while() { # Write $out_string to the output file in each iteration $out_string = &rev_word($_); print OUTPUT_F "$out_string"; } close(OUTPUT_F); close(INPUT_F); } closedir(DIR); # The subroutine definitions follow. Notice that there are no formal parameter definitions. # Any number of parameters can be passed by the caller. The routine gets these parameters in # the special array @_. Therefore, the first parameter will be in the first element of the @_ # array, or $_[0]. sub rev_word { # local variable definitions. Local variables are like automatic variables in C/C++. # We place the actual parameter $_[0] in a local variable. This is because function # parameters are passed by reference. Therefore, to avoid manipulating the original # string, we have opted to alter a copy of the string and then explicitly return the result. local($inputline = $_[0]); local($word); local(@words); local(@new_words); local(@new_char); local(@old_char); local($temp); # We split the input line based on spaces into individual words and place the words into an array. # Note, the newline is excluded from the array. @words = split( /\s+/, $inputline); # We loop through the @words array. The Scalar $word refers to the current element in the array. foreach $word (@words) { # Refresh the local variables. @new_char = (); # The split function takes a pattern (whatever lies between the pair of forward slashes) and # a string that is to be split and places the result in the @old_char array. Each element # of the array contains a character. @old_char = split(// , $word); # We grab characters at the end of the string and push them, one at a time, onto the end of the array # @new_char. We then call join to compose a word with chars in reversed order. for($i = $#old_char; $i>=0; $i--) { push( @new_char, $old_char[$i] ); } # Add a newline character at the end. $word = join('',@new_char); # Add each word to the end of the @new_words array using the push command. push( @new_words, $word); } # A subroutine return value must be placed in the $_ variable. $_ = join(' ',@new_words, "\n"); } # This is a simpler subroutine. sub lower_to_upper { # Copy the input parameters to a local variable $inputline local($inputline = $_[0]); # The translate function takes each character in the $inputline scalar variable and converts it to # upper case, one at a time. The term "a-z" means "any lowercase alphabet." The term "A-Z" means any # uppercase alphabet $inputline =~ tr/a-z/A-Z/; # A subroutine return value must be placed in the $_ variable. $_ = $inputline; }