Homework 4 Solutions

problem1.sh

 awk 'BEGIN{srand()}{print rand() FS \$0}"
              log.txt
                  | sort -n -k 1,1 |  awk '{$1="";print $0}' | sed 's/^[
      \t]\+//'

Explanation

awk 'BEGIN{srand()}{print rand() "#" $0}' log.txt
This prints a random number in the beginning of each line in log.txt and prints it out to the terminal. There are two main parts to this very short awk program:
- BEGIN {srand()}: This is executed only once, and it happens before awk starts processing the lines of the input file. The call to srand() seeds the random number generator with the current date and time. This causes the multiple calls to the function rand() to return a different sequence of random numbers each time the program is executed.
- {print rand() FS $0}: This block is executed once for every line of the input file, and it basically prints out a random number followed by FS (by default a whitespace) followed by the current line from the input text ($0).
sort -n -k 1,1
This sorts the output from the awk program numerically using the random numbers we produced as the sorting key (field). The syntax -k 1,1 means "sort where the key starts at the first field and ends at the first field", in effect this is equivalent to "use only the first field to sort". As each line of the input file was given a random number in the previous command, sorting these lines according to their numbers will result in a new random ordering of the file lines every time we run it.
awk '{$1="";print $0}' | sed 's/^[ \t]\+//'
This part is basically cleaning the extra information we have added to the file, namely the random numbers, by erasing the first field using awk (awk '{$1="";print $0}' ) and one or more whitespaces or tab characters that follow ( sed 's/^[ \t]\+//' ).


      
      
      problem2.awk

            #! /usr/bin/env awk -f

      # This block will run once at the beginning of the execution of the program and
      # will set the field separator (the special FS variable) to be a semicolon.
      BEGIN {FS = ";"}

      # This block will be executed once for each line of the input file. It
      # implicitly creates an associative array where the name of the restaurant
      # (which is always the second column of the input file, and is retrieved via the
      # awk special variable $2) is bound to a list of all names of all people who
      # attended the restaurants in every visit (available in the first column $1).
      { restaurants[$2] = restaurants[$2] "," $1 }

      # This block will be executed only once, and after AWK has processed all the
      # lines of the input file.
      END {
          for (k in restaurants){ # for each key (restaurant name) in the array
              printf("%s: ", k);  # print the name of the restaurant followed by a ':'

              # split the names of people who went to this restaurant and store that
              # list in a new variable called 'names'
              split(restaurants[k], names, ",");

              # the next task is to remove duplicates from the list of customers. We
              # will do this with a simple trick. We will create an associative array
              # called 'customers' that has a dummy entry for each customer. Because
              # keys in an associative array are unique, this simple trick will result
              # in removing the duplicates from the list.
              for (l in names){
                  customer[names[l]]=1
              }

              # because of the way we built the 'restaurants' associative array, the
              # string of customers names assigned to each restaurant will always begin with a ',' and so
              # the split will result in a customer with an empty string name. We will
              # delete that here.
              delete customer[""]

              # now just loop over the customers and print them out.
              for (c in customer){
                  printf("%s ", c);
              }

              # print a new line character when we are done with this restaurant.
              printf("\n");
              # reset the array customer for the next restaurant
              delete customer
          }
      }
      
        
      problem3.awk
      #! /usr/bin/env awk -f

      BEGIN {FS = ";"}  # Use ; as a delimiter. Executed once in the beginning.

      # for every line of the input file, we parse it with the code in this block:
      {   
          n= split($1, table, ",");  # split each name and store them to 'table'

          # This nested for loops extracts every pair of individuals in the given line
          for (i=1; i<=n; i++){
              for (j=i+1; j<=n; j++){ 
                  # For example, given the line
                  # "Bruno,Hussam,Harsh,Atheendra", the following pairs will be
                  # arranged by the values of table[i] and table[j]:
                  # [Bruno,Hussam], [Bruno,Harsh], [Bruno,Atheendra], [Hussam,Harsh], [Hussam,Atheendra], [Harsh,Atheendra]

                  # 'friends' will be used to keep
                  # track of pairs that we've observed so far. We're gonna mark a
                  # pair, such as ["Bruno","Hussam"], whenever we see it using a
                  # multidimensional array so that we won't print it again next
                  # time we see this pair
                  # friends["Bruno","Hussam"] and
                  # friends["Hussam","Bruno"] do not necessarily contain the same
                  # values, so we need to check them both.

                  if (! friends[table[i],table[j]] && ! friends[table[j],table[i]]){   # if this pair hasn't been observed yet:
                      print table[i] table[j]  # print out this pair
                  }

                   # Mark this pair so that we won't print it again next time we see this pair
                  friends[table[i],table[j]]= 1                }
          }
      }
      
      problem4.awk
      #! /usr/bin/env awk -f

      BEGIN {FS = ";"}  # Use ; as a delimiter

      # for every line of the input file, we parse it with the code in this block:
      {   
          n= split($1, table, ",");  # split each name and store them to 'table'

          # This nested for loops extracts every pair of individuals in the given line
          for (i=1; i<=n; i++){
              for (j=i+1; j<=n; j++){
                  # For example, given the line
                  # "Bruno,Hussam,Harsh,Atheendra", the following pairs will be
                  # arranged by the values of table[i] and table[j]:
                  # [Bruno,Hussam], [Bruno,Harsh],
                  # [Bruno,Atheendra], [Hussam,Harsh], [Hussam,Atheendra],
                  # [Harsh,Atheendra]

                  # 'friends' will be used to keep
                  # track of how many times we have observed a particular pair
                  # friends["Bruno","Hussam"] and
                  # friends["Hussam","Bruno"] do not necessarily contain the same
                  # values, so we need to increment them both.

                  friends[table[i],table[j]]+= 1
                  friends[table[j],table[i]]+= 1
              }
          }
      }

      # Once all lines are parsed, this block will be executed.
      END {
          # this for loop will iterate each pair name stored in 'friends'
          for (k in friends){
              # split individuals' names from each
              # pair name. For example, k="Bruno SUBSEP Hussam" => # pair[1]="Bruno", pair[2]="Hussam"
              split(k, pair, SUBSEP);  

              # 'found' will keep track of pairs that have been already printed.
              found[pair[1], pair[2]]= 1;
              if (! ((pair[2], pair[1]) in found)){
                  # print only dyad 3
                  if (friends[k] >= 3){
                      print pair[1] " " pair[2]
                  }
              }
          }
      }
      

      problem5.awk
      
      #! /usr/bin/env awk -f

#Set the Field Separator as semicolon
BEGIN {FS = ";"}
{  
   n= split($1, table, ",")     # split the names of people into table
   for (i=1; i<=n; i++){        
       # an associative array storing the name of
       # a person as key and accumulates the lists of people he/she has dined with as the
       # value
       # this array contains an exhaustive list of the diners with that person including repetition
       together[table[i]] = together[table[i]] "," $1
   }                               
}
# After the file parsing is complete
END {
    for (k in together){                # For each person
        printf("%s ", k);               # print the person name
        split(together[k], names, ","); # split all the dining friends into names
        for (l in names){
            unique[names[l]]=1          # Unique array contains the unique list of friends 
        }
        
        
        delete unique[k]      # delete my own name and 
        delete unique[""]     # the null string before the first comma
        d= length(unique)     # length of the unique array gives number of connections of the person
        printf("%d\n",d);
        delete unique         # delete unique so as to reset it for the next iteration 
    }
}

      
      
      problem6.sh
      
      #! /bin/bash

# Run problem3.awk and do a line count to count the number of dyads
./problem3.awk restaurants.txt | wc -l 

# Run problem5.awk and sum the values in the second column (containing the degree of each person) 
./problem5.awk restaurants.txt | awk '{sum+=$2}END{print sum}'

      
      
      problem7.awk
      
      
      #! /usr/bin/env awk -f

# Set Field separator to Semicolon to separate the restaurant's name
# and the people who visited the restaurant. 
BEGIN {FS = ";"}

# Whenever you find Beula exists in the line (i.e.,
# whenever Beula visited the restaurant,
# execute this code on each of these lines
/Beula/ {  
   n= split($1, table, ",")        # split the names in this party into array tables 
   beula = beula " " table[n-1]    # to keep track of payers, append the payer's name of
                                   # this party to the list of payers, separated by space.
}

# After the whole file is parsed...
END {printf ("Beula: %s\n", beula);    # Print the space-separated payers
    n=split(beula, payers, " ");       # Split the payers' names whenever Beula visited.
    for (i=1; i<=n ; i++){             # For each payer in the list
        # increment the number of times that payer
        # has paid. Here, the key is the payer's name, the value is the number of
        # times the payer has paid.
        pay[payers[i]] += 1            
    }
    # Print the number of times Beula paid
    printf("Beula paid %d/%d times\n", pay["Beula"],n)
}