## Plot some distribution distance functions ## Lillian Lee set term pngcairo dashed set output "distances-between-distributions.png" # Actually, "../www/images/distances-between-distributions.png" set xtics 0,.1 set xrange [0:1] set yrange [0:3] set xlabel 'q(v_1)' set title "Distances between distributions q and fixed r, where r(v_1) = r(v_2) = .5\n(Variation on Figure 2.1 of Lee 1997 PhD thesis)" set samples 500 # needed to get function points closer to x=0 ## Assume two-item vocabulary {v_1, v_2} ## Make things a little more readable, in the sense of not having ## to read "x" and mentally convert to q(v_1). q1(x) = x q2(x) = 1-q1(x) lenq(x)=sqrt(q1(x)**2 + q2(x)**2) ## r represented the fixed categorial distribution we're going to compare to: # r(v_1) = .5 (so r(v_2) = .5) r1 = .5 r2 = 1-r1 lenr=sqrt((r1)**2 + r2**2) L1(x) = abs(q1(x) - r1) + abs(q2(x) - r2) cos1(x) = (q1(x)*r1 + q2(x)*r2)/(lenr*lenq(x)) log2(x) = log(x)/log(2) Dq(x) = q1(x)*log2(q1(x)/r1) + q2(x)*log2(q2(x)/r2) Dr(x) = r1*log2(r1/q1(x)) + r2*log2(r2/q2(x)) Hr(x) = r1*log2(1/q1(x)) + r2*log2(1/q2(x)) avgqr1(x) = .5*(r1 + q1(x)) avgqr2(x) = .5*(r2 + q2(x)) Dqavg(x) = q1(x)*log2(q1(x)/avgqr1(x)) + q2(x)*log2(q2(x)/avgqr2(x)) Dravg(x) = r1*log2(r1/avgqr1(x)) + r2*log2(r2/avgqr2(x)) JS(x) = .5*(Dqavg(x) + Dravg(x)) plot \ Dr(x) title "D(r||q)" lt 1, \ Dq(x) title "D(q||r)" lt 1 dt 2, \ L1(x) title "L_1(q,r)", \ 1-cos1(x) title "1 - cos(q,r)", \ JS(x) title "JS(q,r)" lt 8 #Hr(x) title "H(r||q)" lt 1 dt 5, \