#!/usr/local/bin/perl #Usage: perl dictionary.pl article.txt manywords.txt %dict; sub open_files { if ($#ARGV != 1) { print "Wrong number of arguments\n"; print "Usage: perl dictionary.pl article.txt manywords.txt\n"; exit (1); } else { open (IN1,"<$ARGV[0]") or die "Cannot open $ARGV[0]: $!\n"; open (IN2,"<$ARGV[1]") or die "Cannot open $ARGV[1]: $!\n"; } } sub dictionary_to_hash { my ($line); $line=; #read header while ($line=) { ($w,$f)=split(/\t/,$line); $dict{$w}=$f; } } sub line_to_words { my ($line)=$_[0]; my (@words); $line =~ tr/[A-Z]/[a-z]/; $line =~ s/[!,\.;':\"\)\(\?\-\$]/ /g; @words = split(/\s+/,$line); return @words; } sub insert { my (@words)=@_; my ($w); my ($f); foreach $w (@words) { if ($w !=~ m/\s+/){ if (exists($dict{$w})) { $f=$dict{$w}; $dict{$w}=1+$f; } else { $dict{$w}=1; } } } my (@temp)=keys(%dict); } sub process_new_file { my ($line); my (@words); while ($line=) { @words=line_to_words($line); insert(@words); } } sub update_db { open (IN3,">$ARGV[1]") or die "Cannot open $ARGV[1]: $!\n"; my (@sw)=sort keys(%dict); my ($i); print IN3 "Word", "\t", "Frequency\n"; for $i (0..$#sw) { print IN3 "$sw[$i]", "\t", "$dict{$sw[$i]}\n"; } } sub close_files { close(IN1); close(IN2); close(IN3); } open_files; dictionary_to_hash; process_new_file; update_db; close_files;