problem1.sh

awk 'BEGIN{srand()}{print rand() FS \$0}" log.txt | sort -n -k 1,1 | awk '{$1="";print $0}' | sed 's/^[ \t]\+//'

Explanation


problem2.awk

      #! /usr/bin/env awk -f

      # This block will run once at the beginning of the execution of the program and
      # will set the field separator (the special FS variable) to be a semicolon.
      BEGIN {FS = ";"}

      # This block will be executed once for each line of the input file. It
      # implicitly creates an associative array where the name of the restaurant
      # (which is always the second column of the input file, and is retrieved via the
      # awk special variable $2) is bound to a list of all names of all people who
      # attended the restaurants in every visit (available in the first column $1).
      { restaurants[$2] = restaurants[$2] "," $1 }

      # This block will be executed only once, and after AWK has processed all the
      # lines of the input file.
      END {
          for (k in restaurants){ # for each key (restaurant name) in the array
              printf("%s: ", k);  # print the name of the restaurant followed by a ':'

              # split the names of people who went to this restaurant and store that
              # list in a new variable called 'names'
              split(restaurants[k], names, ",");

              # the next task is to remove duplicates from the list of customers. We
              # will do this with a simple trick. We will create an associative array
              # called 'customers' that has a dummy entry for each customer. Because
              # keys in an associative array are unique, this simple trick will result
              # in removing the duplicates from the list.
              for (l in names){
                  customer[names[l]]=1
              }

              # because of the way we built the 'restaurants' associative array, the
              # string of customers names assigned to each restaurant will always begin with a ',' and so
              # the split will result in a customer with an empty string name. We will
              # delete that here.
              delete customer[""]

              # now just loop over the customers and print them out.
              for (c in customer){
                  printf("%s ", c);
              }

              # print a new line character when we are done with this restaurant.
              printf("\n");
              # reset the array customer for the next restaurant
              delete customer
          }
      }
      

problem3.awk

      #! /usr/bin/env awk -f

      BEGIN {FS = ";"}  # Use ; as a delimiter. Executed once in the beginning.

      # for every line of the input file, we parse it with the code in this block:
      {   
          n= split($1, table, ",");  # split each name and store them to 'table'

          # This nested for loops extracts every pair of individuals in the given line
          for (i=1; i<=n; i++){
              for (j=i+1; j<=n; j++){ 
                  # For example, given the line
                  # "Bruno,Hussam,Harsh,Atheendra", the following pairs will be
                  # arranged by the values of table[i] and table[j]:
                  # [Bruno,Hussam], [Bruno,Harsh], [Bruno,Atheendra], [Hussam,Harsh], [Hussam,Atheendra], [Harsh,Atheendra]

                  # 'friends' will be used to keep
                  # track of pairs that we've observed so far. We're gonna mark a
                  # pair, such as ["Bruno","Hussam"], whenever we see it using a
                  # multidimensional array so that we won't print it again next
                  # time we see this pair
                  # friends["Bruno","Hussam"] and
                  # friends["Hussam","Bruno"] do not necessarily contain the same
                  # values, so we need to check them both.

                  if (! friends[table[i],table[j]] && ! friends[table[j],table[i]]){   # if this pair hasn't been observed yet:
                      print table[i] table[j]  # print out this pair
                  }

                   # Mark this pair so that we won't print it again next time we see this pair
                  friends[table[i],table[j]]= 1                }
          }
      }

problem4.awk

      #! /usr/bin/env awk -f

      BEGIN {FS = ";"}  # Use ; as a delimiter

      # for every line of the input file, we parse it with the code in this block:
      {   
          n= split($1, table, ",");  # split each name and store them to 'table'

          # This nested for loops extracts every pair of individuals in the given line
          for (i=1; i<=n; i++){
              for (j=i+1; j<=n; j++){
                  # For example, given the line
                  # "Bruno,Hussam,Harsh,Atheendra", the following pairs will be
                  # arranged by the values of table[i] and table[j]:
                  # [Bruno,Hussam], [Bruno,Harsh],
                  # [Bruno,Atheendra], [Hussam,Harsh], [Hussam,Atheendra],
                  # [Harsh,Atheendra]

                  # 'friends' will be used to keep
                  # track of how many times we have observed a particular pair
                  # friends["Bruno","Hussam"] and
                  # friends["Hussam","Bruno"] do not necessarily contain the same
                  # values, so we need to increment them both.

                  friends[table[i],table[j]]+= 1
                  friends[table[j],table[i]]+= 1
              }
          }
      }

      # Once all lines are parsed, this block will be executed.
      END {
          # this for loop will iterate each pair name stored in 'friends'
          for (k in friends){
              # split individuals' names from each
              # pair name. For example, k="Bruno SUBSEP Hussam" => # pair[1]="Bruno", pair[2]="Hussam"
              split(k, pair, SUBSEP);  

              # 'found' will keep track of pairs that have been already printed.
              found[pair[1], pair[2]]= 1;
              if (! ((pair[2], pair[1]) in found)){
                  # print only dyad 3
                  if (friends[k] >= 3){
                      print pair[1] " " pair[2]
                  }
              }
          }
      }

problem5.awk

#! /usr/bin/env awk -f

#Set the Field Separator as semicolon
BEGIN {FS = ";"}
{  
   n= split($1, table, ",")     # split the names of people into table
   for (i=1; i<=n; i++){        
       # an associative array storing the name of
       # a person as key and accumulates the lists of people he/she has dined with as the
       # value
       # this array contains an exhaustive list of the diners with that person including repetition
       together[table[i]] = together[table[i]] "," $1
   }                               
}
# After the file parsing is complete
END {
    for (k in together){                # For each person
        printf("%s ", k);               # print the person name
        split(together[k], names, ","); # split all the dining friends into names
        for (l in names){
            unique[names[l]]=1          # Unique array contains the unique list of friends 
        }
        
        
        delete unique[k]      # delete my own name and 
        delete unique[""]     # the null string before the first comma
        d= length(unique)     # length of the unique array gives number of connections of the person
        printf("%d\n",d);
        delete unique         # delete unique so as to reset it for the next iteration 
    }
}

problem6.sh

#! /bin/bash

# Run problem3.awk and do a line count to count the number of dyads
./problem3.awk restaurants.txt | wc -l 

# Run problem5.awk and sum the values in the second column (containing the degree of each person) 
./problem5.awk restaurants.txt | awk '{sum+=$2}END{print sum}'

problem7.awk


      
#! /usr/bin/env awk -f

# Set Field separator to Semicolon to separate the restaurant's name
# and the people who visited the restaurant. 
BEGIN {FS = ";"}

# Whenever you find Beula exists in the line (i.e.,
# whenever Beula visited the restaurant,
# execute this code on each of these lines
/Beula/ {  
   n= split($1, table, ",")        # split the names in this party into array tables 
   beula = beula " " table[n-1]    # to keep track of payers, append the payer's name of
                                   # this party to the list of payers, separated by space.
}

# After the whole file is parsed...
END {printf ("Beula: %s\n", beula);    # Print the space-separated payers
    n=split(beula, payers, " ");       # Split the payers' names whenever Beula visited.
    for (i=1; i<=n ; i++){             # For each payer in the list
        # increment the number of times that payer
        # has paid. Here, the key is the payer's name, the value is the number of
        # times the payer has paid.
        pay[payers[i]] += 1            
    }
    # Print the number of times Beula paid
    printf("Beula paid %d/%d times\n", pay["Beula"],n)
}