#include #include #include #include #define MAX_ITEMS 200000 #define MAX_BINS 1000 #define debug 0 #define eps 1.0e-99 /* modified 11.08.03 to add probability calibration mode modified 06.02.03 to add loss mode (converts all measures to losses ranging from 0 to 1) modified 05.22.03 to add all -- perf measure that is W1*ACC + W2*ROCA - W3*RMS modified 05.13.03 to add SLAC's Q-score Modified 05.12.03 by Filip Added stderr warning when a prediction is negative but threshold is 0.5 Modified 05.01.03 by Filip Added usage and -files option modified 04.16.03 to add norm and add -percent option modified 04.16.03 to add average precision and cross entropy and to fix break even point modified 02.02.03 to add rms modified 01.17.02 to add precision/recall, break-even, F, lift, confusion table, cost matrix modified 02.02.00 to fix auto_threshold bug!!! and take negative of ACE modified 11.12.99 to compile on unix, try "cc -o perf2 perf2.c -lm" This program accepts a number of options: -stats : print stats (ACC, RMS, PPV, NPV, SEN, SPC, PRE, REC, LFT) -easy : print only ACC, RMS, ROC -norm n : computes the n norm of the error -threshold 0.83 : use value 0.83 as threshold for stats -percent prc : predict prc% of the dataset to be 1's (usefull for lift) -costs ca cb cc cd : apply costs to confusion matrix and print total cost -rocplot : print ROC curve -accplot : plot accuracy vs. threshold -prplot : precision/recall plot -liftplot : plot lift curve -costplot : plot cost vs. threshold -noroc : don't print ROC area -confusion : print confusion matrix -ace : include ACE calibration score -breyce : include breyce calibration score -monti : only output accuracies, roc area, ace, and breyce -slacq binwidth seed n : output the mean SLAC Q-score after n trials of splitting into two test sets -all wacc wroc wrms : wacc*ACC + wroc*ROC - wrms*RMS (use positive weight for RMS) -loss : all measures displayed as losses on [0,1] -cal n : mean absolute probability calibration error for bins of size n points If no option is specified, only the ROC area is printed. Most options may be combined (plots such as ROC plot and acc vs. thresh plots disable each other). Options may be specified in any order. Abbreviations are available for most options. Input to program is a sequence of lines, one line per case with format: "TRUE_VALUE whitespace PRED_VALUE" The file roc.test.data contains sample test data. The file roc.test.results contains the results of running: perf -option < roc.test.data > roc.test.results TRUE_VALUE and PRED_VALUE can be any numbers, positive or negative. The program computes the mean TRUE_VALUE and mean PRED_VALUE. If TRUE_VALUE <= MEAN_TRUE_VALUE, the truth for the case is considered to be a 0, otherwise it is considered a 1. This lets you use any coding for the TRUE_VALUE's as long as the value used to represent 0's is less than the value used to represent 1's. So you can code things as 0/1, -1/+1, etc. Note that the ACE and Breyce calibration scores both assume the input data uses 0=false, 1=true, with the prediction probabilities ranging between 0 and 1. The program computes the ROC curve, ROC area, accuracy (ACC), Positive Predictive Value (PPV), Negative Predictive Value (NPC), Sensitivity (SEN), Specificity(SPC), Precision/Recall curve, F, Precision/Recall Break Even point, and Lift. Computing things like Accuracy requires a prediction threshold for the PRED_VALUE: (PRED_VALUE < thresh) is interpreted as a prediction of 0. (PRED_VALUE >= thresh) is interpreted as a prediction of 1. The program takes an optional argument that lets you specify the prediction threshold. If you don't specify a value, the program assumes 0.5, which is the right thing to do if the PRED_VALUES are properly calibrated probabilities. The program also computes two different prediction threshold values itself. The first is done by finding the threshold that would make the number of predicted 1's match the number of true 1's in the data set. If 15 of 100 points given to the program are true 1's, it sorts the points by PRED_VALUE (least first) and computes a threshold from the average PRED_VALUE of points 84 and 85 in the sort. This prediction threshold makes 15 of the 100 points be predicted 1. (If two or more points have the same PRED_VALUE and these to sort to the place where the threshold finder picks values, the number of points predicted to be 1 can't match the number of true 1's.) The second computed threshold is the prediction threshold that maximizes the Accuracy. This threshold is found by trying all thresholds that fall half way between adjacent prediction values in the data set, and reporting the one that yields the highest accuracy. It is not uncommon for more than one threshold to yield the same maximum accuracy. In this case, the program reports the lowest threshold that achieved maximum accuracy, and prints a caution that more than one prediction threshold achives this same accuracy. When this happens, you can rerun the program with the -accplot option to see the accuracy for each threshold. (In Unix "perf -accplot -noroc < infile | sort -n +1 | tail" will find just those thresholds that yielded highest accuracy.) The program computes the error measures listed above for the 0.5 threshold (or user supplied threshold) and for the two automatically computed thresholds. (Neither affects the ROC curve or its area.) Be careful using computed thresholds and their associated stats. It is probably improper to use the threshold computed on a test set to estimate statistics for that test set. The threshold probably should be estimated with the training set, and then given to the program to calculate test set stats. That's why the threshold can be specified as an optional parameter. The ROC curve is a plot of SENSITIVITY vs. 1-SPECIFICITY as the prediction threshold sweeps from 0 to 1. A typical ROC curve for reasonably accurate predictions looks like this: | * S | * E | * N | * S | I | * T | I | * V | I | * T | Y |* - - - - - - - - - - - - - - - - - - - 1 - SPECIFICITY If there is no relationship between the prediction and truth, thethe ROC curve is a diagonal line with area 0.5. If the prediction strongly predicts truth, the curve rises quickly and has area near 1.0. If the prediction strongly predicts anti-truth, the ROC area is less than 0.5. Here's a definition of SPECIFICITY, SENSITIVITY, and the other error measures this program computes such as PRECISION, RECALL, LIFT, ... (Some of this is from Constantin's email.) Here's a confusion matrix for a two class problem: MODEL PREDICTION | 1 0 | - - - + - - - - - - - - - - - + - - - - - TRUE 1 | A B | A+B OUTCOME | | 0 | C D | C+D - - - + - - - - - - - - - - - + - - - - - | A+C B+D | A+B+C+D 1 = POSITIVE 0 = NEGATIVE ACC = (A+D) /(A+B+C+D) PPV = A / (A+C) NPV = D / (B+D) SEN = A / (A+B) SPE = D / (C+D) PRE = A / (A+C) (PRECISION = PPV) REC = A / (A+B) (RECALL = SENSITIVITY) A Precision/Recall Curve is a plot of Precision on the vertical axis vs. Recall on the horizontal axis as the threshold sweeps through the data. A Precision/Recall plot attempts to measure similar behavior as an ROC plot. But ROC and PR plots are not the same. The vertical axis on an ROC plot is the same as the horizontal axis on a PR plot, but the other axes differ. PR plots tend to be used by the information retreival communities, wheras ROC curves tend to be used by the medical, signal processing, statistics, and machine learning communities. ROC plots have some properties that PR plots do not have: ROCs are monotonic, always touch both axes, and have well characterized statistical properties. PR plots need not be monotonic (but often are), may not touch the Precision axes (when the cases with the highest predicted values are class negative), and has less statistical semantics. In practice, however, both provide qualitatively similar views of performance. Here is a sample Precision/Recall Curve: | P | * R | * * E | * C | * C | * I | * S | * I | * O | * N | * | * - - - - - - - - - - - - - - - - - - - RECALL LIFT and LIFT CURVES measure similar quantities as ROC and and Precision/Recall. Lift is a measure of the percent of the positive class that is in the returned objects. For example, if a dataset contains 1000 objects, 200 of which are the target class (positives), and the threshold is set so that 30% of the dataset is returned, and that 30% contains 90% of the positives, then LIFT = 0.90/0.30 = 3. LIFT greater than 1 means the retreived objects (the objects greater than threshold) have more targets in them than a random sample would have. LIFT less than 1 is bad. LIFT = 1 is what is expected from random (uninformed) sampling. Obviously we want LIFT as large as possible. The largest LIFT occurs if 100% of the target class is returned without any of the negatives being included, perfect prediction. If the target class is p% of the dataset, and all targets are returned in a sample also of size p%, LIFT = 100%/p%. For the example above where 20% of the data is targets, the maximum lift is 100%/20% = 5. LIFT sometimes is plotted as a function of the percent of objects returned. This is the LIFT plot. LIFT often is presented in a table of LIFTS measured when some small but interesting percent of the objects are returned (e.g., 5%, 10%, 15%, 20%, 25%, ...). LIFT is a good measure to use in addition to accuracy. Suppose the accuracy of a model is 90%. Is this good performance? Maybe. Maybe not. If it is possible to acheive 90% accuracy by just predicting the most common class, then a model with 90% accuracy is nothing to write home about. LIFT is less sensitive to the base prediction rate than accuracy. For example, if 90% of the data is 0's, 10% 1's, the base accuracy is 90% if the model predicts 0 for everything, so 90% accuracy is not really very good. If a threshold is used that causes 10% of the cases to be predicted 1, and if 80% of the cases predicted to be 1 actually are 1, the accuracy is 0.80*0.1 + Lift tends to be used in marketing and data mining more than in machine learning and information retreival because it is easy to understand, and because it directly measures how effective the model is at detecting targets with high reliability. For example, if you are going to send marketing flyers to only 10% of your customers, you want that 10% to include as large a fraction of targets (responders) as possible. WARNING!: This code has not been thoroughly tested. If you find an error, please email me: caruana@cs.cornell.edu */ float true[MAX_ITEMS]; float pred[MAX_ITEMS]; double mean_true, mean_pred; double pred_thresh; double sse, rmse, cxe, loge,norm,norm_err; double a, b, c, d; double freq_thresh, threshold; double max_acc, max_acc_thresh, last_acc_thresh, acc, acc_plot; double min_cost, min_cost_thresh, last_cost_thresh, cost, cost_plot; double cost_a, cost_b, cost_c, cost_d; int costs, min_cost_count; double freq_a, freq_b, freq_c, freq_d; double max_acc_a, max_acc_b, max_acc_c, max_acc_d; int max_acc_count; int arg, taken, area, break_even, roc_plot, pr_plot, lift_plot, stats, easy, thresh, confusion, nrm, prcdata; int ace, breyce, monti; int slacq_flag, slacq_seed, slacq_trials; double slacq_window, slacq_bin_width, slac_w, slac_q, last_p, n_cor, n_tot; double slac_q_sum, slac_q_sumsquares, slac_q_mean, slac_q_sdev; int slacq_tb[2][MAX_BINS], slacq_tbb[2][MAX_BINS], bin, no_bins, set, slac_qn, slacq_trials, trial; int no_item, item; int tt, tf, ft, ff; int total_true_0, total_true_1; double sens, spec, tpf, fpf, tpf_prev, fpf_prev, roc_area; double ace_sum, breyce_sum; double precision, recall, precision_prev, recall_prev, pr_break_even; double lift, frac_above_threshold; double prcones, apr; int lasttt, lasttf, lastft, lastff, cnt, onecnt, i; int data_split, no_remaining; double data_prc; int all; double allwacc, allwroc, allwrms; int loss; int cal_flag, cal_bin_size, cal_f, cal_t, cal_sum_n; double sum_p, cal_sum, cal_sumsquares, obs_p, cal_err, cal_mean_err, plow, phigh; double max_lift, min_lift; /* Print the usage */ void print_usage(char *s) { printf("Usage:\n\t %s [options] < input > output\n", s); printf("\tOR %s [options] -files > output\n\n", s); printf("Allowed options:\n"); printf("\n\t INDIVIDUAL MEASURES\n"); printf("\t-a Compute ROC area (ROC) [default]\n"); printf("\t-noa Don't compute ROC Area\n"); printf("\t-break Compute Precision/Recall Break Even Point (PRB/PRF)\n"); printf("\t-ace Include ACE calibration score (ACE)\n"); printf("\t-breyce Include Breyce calibation score (BRE)\n"); printf("\t-norm Compute the norm error using L metric (NRM)\n"); printf("\t-cost \n"); printf("\t Compute total cost using these cost values\n"); printf("\t-c Print confusion table and many stats\n"); printf("\n\t (not available individually)\n"); printf("\t ACC Accuracy\n"); printf("\t APR Mean Average Precision\n"); printf("\t CXE (UNKNOWN)\n"); printf("\t NPV (UNKNOWN)\n"); printf("\t PRE Precision\n"); printf("\t REC Recall\n"); printf("\t RMS Root Mean Squared Error\n"); printf("\t SEN (UNKNOWN)\n"); printf("\t SPC (UNKNOWN)\n"); printf("\n\t COMMON GROUPS\n"); printf("\t-easy Compute ROC, ACC and RMS\n"); printf("\t-monti Compute ROC, ACC, ACE and BRE\n"); printf("\t-s Compute all stats and confusion matrix\n"); printf("\n\t PLOTS (Only one plot is drawn at a time)\n"); printf("\t-p Draw ROC plot\n"); printf("\t-pr Draw Precision/Recall plot\n"); printf("\t-lift Draw Lift versus threshold plot\n"); printf("\t-costplot Draw Cost versus threshold plot\n"); printf("\t-accuracy Draw Accuracy versus threshold plot\n"); printf("\n\t PARAMETERS\n"); printf("\t-t Set threshold [default: 0.5]\n"); printf("\t-percent Set threshold so percent of data is positive\n"); printf("\n\t INPUT\n"); printf("\t-files Read input from files\n"); printf("\n"); } /* compute the accuracy using the threshold */ double accuracy (double threshold) { int a,b,c,d,item; a = 0; b = 0; c = 0; d = 0; for (item=0; item= threshold ) a++; else b++; } else /* true outcome = 0 */ { if ( pred[item] >= threshold ) c++; else d++; } return( ((double)(a+d)) / (((double)(a+b+c+d)) + eps) ); } void print_confusion_table (a,b,c,d,thresh) double a,b,c,d; double thresh; { printf ("\n"); printf (" Predicted_1 Predicted_0 Threshold %9.6lf\n", thresh); printf ("True_1 %6.4lf %6.4lf\n", a, b); printf ("True_0 %6.4lf %6.4lf\n", c, d); } /* partition is used by quicksort */ int partition (p,r) int p,r; { int i, j; float x, tempf; x = pred[p]; i = p - 1; j = r + 1; while (1) { do j--; while (!(pred[j] <= x)); do i++; while (!(pred[i] >= x)); if (i < j) { tempf = pred[i]; pred[i] = pred[j]; pred[j] = tempf; tempf = true[i]; true[i] = true[j]; true[j] = tempf; } else return(j); } } /* vanilla quicksort */ void quicksort (p,r) int p,r; { int q; if (p < r) { q = partition (p,r); quicksort (p,q); quicksort (q+1,r); } } int main (argc, argv) int argc; char *argv[]; { /* Used for -files option */ char *infile=NULL, *outfile=NULL; FILE *f_in=NULL, *f_out=NULL; int warned_user=0; area = 1; break_even = 0; roc_plot = 0; pr_plot = 0; lift_plot = 0; stats = 0; thresh = 0; acc_plot = 0; pred_thresh = 0.5; ace = 0; breyce = 0; monti = 0; confusion = 0; costs = 0; easy = 0; nrm = 0; prcdata = 0; slacq_flag = 0; all = 0; loss = 0; cal_flag = 0; arg = 1; while ( arg < argc ) { taken = 0; if (!strcmp(argv[arg], "-a") || !strcmp(argv[arg], "-area") || !strcmp(argv[arg], "-rocarea") || !strcmp(argv[arg], "-roc_area") || !strcmp(argv[arg], "-ROC_area") || !strcmp(argv[arg], "-roc") || !strcmp(argv[arg], "-ROC")) { area = 1; taken = 1; } if (!strcmp(argv[arg], "-noa") || !strcmp(argv[arg], "-noroc") || !strcmp(argv[arg], "-noROC") || !strcmp(argv[arg], "-noarea")) { area = 0; taken = 1; } if (!strcmp(argv[arg], "-easy")) { easy = 1; area = 1; taken = 1; } if (!strcmp(argv[arg], "-norm")) { nrm = 1; arg++; norm = atof(argv[arg]); taken = 1; } if (!strcmp(argv[arg], "-percent")) { prcdata = 1; arg++; data_prc = atof(argv[arg]); if (data_prc<0||data_prc>100) { fprintf(stderr,"The argument for -percent must be between 0 and 100.\n");fflush(stderr); exit(-1); } taken = 1; } if (!strcmp(argv[arg], "-break_even") || !strcmp(argv[arg], "-break") || !strcmp(argv[arg], "-break_even_point") || !strcmp(argv[arg], "-breakeven") || !strcmp(argv[arg], "-breakevenpoint") || !strcmp(argv[arg], "-breakevenpt") || !strcmp(argv[arg], "-break_even_point") || !strcmp(argv[arg], "-pr_break_even") || !strcmp(argv[arg], "-pr_break") || !strcmp(argv[arg], "-pr_break_even_point") || !strcmp(argv[arg], "-prbreakeven") || !strcmp(argv[arg], "-prbreakevenpoint") || !strcmp(argv[arg], "-prbreakevenpt") || !strcmp(argv[arg], "-prbreak_even_point")) { break_even = 1; taken = 1; } if (!strcmp(argv[arg], "-p") || !strcmp(argv[arg], "-rocplot") || !strcmp(argv[arg], "-ROCplot") || !strcmp(argv[arg], "-roc_plot") || !strcmp(argv[arg], "-ROC_plot") || !strcmp(argv[arg], "-plot")) { roc_plot = 1; pr_plot = 0; acc_plot = 0; lift_plot = 0; cost_plot = 0; taken = 1; } if (!strcmp(argv[arg], "-pr") || !strcmp(argv[arg], "-prplot") || !strcmp(argv[arg], "-PRplot") || !strcmp(argv[arg], "-pr_plot") || !strcmp(argv[arg], "-PR_plot") || !strcmp(argv[arg], "-plotpr")) { pr_plot = 1; roc_plot = 0; acc_plot = 0; lift_plot = 0; cost_plot = 0; taken = 1; } if (!strcmp(argv[arg], "-lift") || !strcmp(argv[arg], "-liftplot") || !strcmp(argv[arg], "-LIFTplot") || !strcmp(argv[arg], "-lift_plot") || !strcmp(argv[arg], "-LIFT_plot") || !strcmp(argv[arg], "-plotlift")) { pr_plot = 0; roc_plot = 0; acc_plot = 0; cost_plot = 0; lift_plot = 1; taken = 1; } if (!strcmp(argv[arg], "-costsplot") || !strcmp(argv[arg], "-costplot") || !strcmp(argv[arg], "-plotcosts") || !strcmp(argv[arg], "-plotcost") || !strcmp(argv[arg], "-cost_plot") || !strcmp(argv[arg], "-costs_plot")) { pr_plot = 0; roc_plot = 0; acc_plot = 0; lift_plot = 0; cost_plot = 1; taken = 1; } if (!strcmp(argv[arg], "-accuracy") || !strcmp(argv[arg], "-accuracyplot") || !strcmp(argv[arg], "-accuracy_plot") || !strcmp(argv[arg], "-ACCplot") || !strcmp(argv[arg], "-ACC_plot") || !strcmp(argv[arg], "-accplot") || !strcmp(argv[arg], "-acc_plot") || !strcmp(argv[arg], "-threshold_plot") || !strcmp(argv[arg], "-thresholdplot")) { acc_plot = 1; roc_plot = 0; lift_plot = 0; taken = 1; } if (!strcmp(argv[arg], "-s") || !strcmp(argv[arg], "-stat") || !strcmp(argv[arg], "-stats")) { stats = 1; taken = 1; } if (!strcmp(argv[arg], "-c") || !strcmp(argv[arg], "-confusion") || !strcmp(argv[arg], "-table")) { confusion = 1; taken = 1; } if (!strcmp(argv[arg], "-t") || !strcmp(argv[arg], "-thresh") || !strcmp(argv[arg], "-threshold")) { thresh = 1; arg++; pred_thresh = atof(argv[arg]); taken = 1; } if (!strcmp(argv[arg], "-cost") || !strcmp(argv[arg], "-costs") || !strcmp(argv[arg], "-matrix")) { costs = 1; stats = 1; arg++; cost_a = atof(argv[arg]); arg++; cost_b = atof(argv[arg]); arg++; cost_c = atof(argv[arg]); arg++; cost_d = atof(argv[arg]); taken = 1; } if (!strcmp(argv[arg], "-ace") || !strcmp(argv[arg], "-ACE")) { ace = 1; taken = 1; } if (!strcmp(argv[arg], "-breyce")) { breyce = 1; taken = 1; } if (!strcmp(argv[arg], "-monti")) { monti = 1; ace = 1; breyce = 1; taken = 1; } if (!strcmp(argv[arg], "-slacq")) { slacq_flag = 1; arg++; slacq_bin_width = atof(argv[arg]); no_bins = floor(eps + 1.0/slacq_bin_width); if ( no_bins > MAX_BINS ) { printf("\nSLAC_Q bin width too small: %8.4lf\n", slacq_bin_width); exit(-1); } arg++; slacq_seed = atoi(argv[arg]); arg++; slacq_trials = atoi(argv[arg]); taken = 1; } if (!strcmp(argv[arg], "-cal")) { cal_flag = 1; arg++; cal_bin_size = atoi(argv[arg]); taken = 1; } if (!strcmp(argv[arg], "-files") && (arg+2 < argc)) { infile = argv[arg+1]; outfile = argv[arg+2]; /* Test files are readable */ if ((f_in = fopen(infile, "r")) == NULL) { fprintf(stderr, "Error: Can't open true values file %s\n", infile); exit(1); } if ((f_out = fopen(outfile, "r")) == NULL) { fprintf(stderr, "Error: Can't open predicted values file %s\n", outfile); exit(1); } arg += 2; taken = 1; } if (!strcmp(argv[arg], "-all") || !strcmp(argv[arg], "-ALL")) { all = 1; stats = 1; arg++; allwacc = atof(argv[arg]); arg++; allwroc = atof(argv[arg]); arg++; allwrms = atof(argv[arg]); taken = 1; } if (!strcmp(argv[arg], "-loss")) { loss = 1; taken = 1; } if (!taken) { printf("\nError: Unrecognized program option %s\n", argv[arg]); print_usage(argv[0]); exit(1); } arg++; } no_item = 0; mean_true = 0.0; mean_pred = 0.0; sse = 0.0; loge = log(2.71828); cxe=0.0; norm_err = 0.0; while (1) { if (infile == NULL) { if ((scanf("%f %f", &true[no_item], &pred[no_item])) == EOF) break; } else { fscanf(f_in, "%f", &true[no_item]); fscanf(f_out, "%f", &pred[no_item]); if (feof(f_in)) { if (!feof(f_out)) fprintf(stderr, "Warning: True values file is longer.\n"); break; } if (feof(f_out)) { fprintf(stderr, "Warning: Predicted values file is longer.\n"); break; } } /* Warn user if we get a negative input but threshold is 0. This is only relevant for some measures (eg accuracy) but its very easy to fall into this trap when using SVM output. FR */ if (pred[no_item] < 0 && warned_user==0 && thresh==0) { fprintf(stderr, "WARNING: Negative prediction but threshold is at default of %f.\n", pred_thresh); warned_user = 1; } /* Note that unlike the rest of the code, the RMSE, norm and Cross-Entropy calculation assumes the targets and predicted values are already scaled. */ cxe+= -true[no_item]*log(pred[no_item]+0.00000001)/loge - (1.0-true[no_item])*log(1.0-pred[no_item]+0.00000001)/loge; //cross-entropy sse+= (true[no_item]-pred[no_item])*(true[no_item]-pred[no_item]); if (nrm) norm_err+= pow(fabs(true[no_item]-pred[no_item]),norm); mean_true+= true[no_item]; mean_pred+= pred[no_item]; if (ace) { /* * modified by S. Monti 2/4/2000 * * ace is computed as follows: ace = - sum_i log p(y_i) * * remove this... * if (pred[no_item] <= 0.0) ace_sum += -9e99; else ace_sum += log(pred[no_item]); * * and replace with following */ if ( true[no_item] ) { ace_sum += ( pred[no_item]<=0.0 ? -9e99 : log(pred[no_item]) ); } else { ace_sum += ( pred[no_item]>=1.0 ? -9e99 : log(1.0-pred[no_item]) ); } } if (breyce) breyce_sum += (true[no_item]-pred[no_item])*(true[no_item]-pred[no_item]); no_item++; if ( no_item >= MAX_ITEMS ) { printf ("Aborting. Exceeded %d items.\n", MAX_ITEMS); exit(1); } } mean_true = mean_true / ((double) no_item); mean_pred = mean_pred / ((double) no_item); rmse = sqrt (sse / ((double) no_item)); if (nrm) norm_err = pow(norm_err/no_item,1/norm); cxe = cxe / ((double) no_item); if (debug) { printf("%d pats read. mean_true %6.4lf. mean_pred %6.4lf\n", no_item, mean_true, mean_pred); fflush(stdout); } total_true_0 = 0; total_true_1 = 0; for (item=0; item max_acc ) { max_acc = acc; max_acc_thresh = threshold; last_acc_thresh = threshold; max_acc_count = 1; } if ( (acc == max_acc) && (threshold != last_acc_thresh) ) { max_acc_count++; last_acc_thresh = threshold; } } */ tt = 0; tf = total_true_1; ft = 0; ff = total_true_0; /* place 1st threshold just above top predicted value */ threshold = pred[no_item-1] + (pred[no_item-1] - pred[no_item-2]) / 2.0; acc = ((double)(tt+ff))/(((double)(no_item)) + eps); max_acc = acc; cost = cost_a*((double)tt) + cost_b*((double)tf) + cost_c*((double)ft) + cost_d*((double)ff); min_cost = cost; if ( acc_plot ) printf ("%lf %lf\n", threshold, acc); if ( cost_plot ) printf ("%lf %lf\n", threshold, cost); for (item=no_item-1; item>-1; item--) { tt+= true[item]; tf-= true[item]; ft+= 1 - true[item]; ff-= 1 - true[item]; if ( item > 0 ) threshold = (pred[item] + pred[item-1]) / 2.0; else /* place last threshold just below bottom predicted value */ threshold = pred[0] - (pred[1] - pred[0]) / 2.0; acc = ((double)(tt+ff))/(((double)(no_item)) + eps); cost = cost_a*((double)tt) + cost_b*((double)tf) + cost_c*((double)ft) + cost_d*((double)ff); if ( acc_plot ) printf ("%lf %lf\n", threshold, acc); if ( cost_plot ) printf ("%lf %lf\n", threshold, cost); if ( acc > max_acc ) { max_acc = acc; max_acc_thresh = threshold; last_acc_thresh = threshold; max_acc_count = 1; } if ( (acc == max_acc) && (threshold != last_acc_thresh) ) { max_acc_count++; last_acc_thresh = threshold; } if ( cost < min_cost ) { min_cost = cost; min_cost_thresh = threshold; last_cost_thresh = threshold; min_cost_count = 1; } if ( (cost == min_cost) && (threshold != last_cost_thresh) ) { min_cost_count++; last_cost_thresh = threshold; } } /* find the prediction threshold such that the predicted number */ /* of 0's and 1's matches the observed number of true 0's and 1's */ freq_thresh = (pred[total_true_0]+pred[total_true_0-1])/2.0; /* now update some statistics using the various thresholds */ a = 0; b = 0; c = 0; d = 0; freq_a = 0; freq_b = 0; freq_c = 0; freq_d = 0; max_acc_a = 0; max_acc_b = 0; max_acc_c = 0; max_acc_d = 0; for (item=0; item= pred_thresh ) a++; else b++; if ( pred[item] >= freq_thresh ) freq_a++; else freq_b++; if ( pred[item] >= max_acc_thresh ) max_acc_a++; else max_acc_b++; } else /* true outcome = 0 */ { if ( pred[item] >= pred_thresh ) c++; else d++; if ( pred[item] >= freq_thresh ) freq_c++; else freq_d++; if ( pred[item] >= max_acc_thresh ) max_acc_c++; else max_acc_d++; } } /* set the threshold to predict x% of the cases to be 1's */ if(prcdata) { data_split=no_item - (int)(no_item*data_prc/100); if(data_split <= 0) pred_thresh = pred[0] - 1.0; else if (data_split >= no_item) pred_thresh = pred[no_item-1]+1; else pred_thresh = (pred[data_split]+pred[data_split-1])/2.0; a=b=c=d=0; item = no_item-1; while (pred[item] > pred_thresh && item >= 0) { if (true[item] == 1) a++; else c++; item--; } no_remaining = item - data_split + 1; cnt = onecnt = 0; while (pred[item] == pred_thresh && item >= 0) // I think this is safe { cnt++; if (true[item] == 1) onecnt++; item--; } if (cnt) { printf("Warning: there are %d ties at %6.4lf%% of the data\n",cnt,data_prc);fflush(stdout); a+= 1.0*no_remaining*onecnt/cnt; c+= 1.0*no_remaining*(cnt-onecnt)/cnt; b+= 1.0*(cnt-no_remaining)*onecnt/cnt; d+= 1.0*(cnt-no_remaining)*(cnt-onecnt)/cnt; } while (item >= 0) { if (true[item] == 1) b++; else d++; item--; } } /* now let's do the ROC curve and area */ tt = 0; tf = total_true_1; ft = 0; ff = total_true_0; sens = ((double) tt) / ((double) (tt+tf)); spec = ((double) ff) / ((double) (ft+ff)); tpf = sens; fpf = 1.0 - spec; if ( roc_plot ) printf ("%6.4lf %6.4lf\n", fpf, tpf); roc_area = 0.0; tpf_prev = tpf; fpf_prev = fpf; for (item=no_item-1; item>-1; item--) { tt+= true[item]; tf-= true[item]; ft+= 1 - true[item]; ff-= 1 - true[item]; sens = ((double) tt) / ((double) (tt+tf)); spec = ((double) ff) / ((double) (ft+ff)); tpf = sens; fpf = 1.0 - spec; if ( item > 0 ) if ( pred[item] != pred[item-1] ) { if ( roc_plot ) printf ("%6.4lf %6.4lf\n", fpf, tpf); roc_area+= 0.5*(tpf+tpf_prev)*(fpf-fpf_prev); /* printf ("0.5*(%6.4lf+%6.4lf)*(%6.4lf-%6.4lf) = 0.5*(%6.4lf)*(%6.4lf) = %6.4lf %6.4lf\n", tpf,tpf_prev,fpf,fpf_prev,tpf+tpf_prev,fpf-fpf_prev,0.5*(tpf+tpf_prev)*(fpf-fpf_prev),roc_area); */ tpf_prev = tpf; fpf_prev = fpf; } if ( item == 0 ) { if ( roc_plot ) printf ("%6.4lf %6.4lf\n", fpf, tpf); roc_area+= 0.5*(tpf+tpf_prev)*(fpf-fpf_prev); /* printf ("0.5*(%6.4lf+%6.4lf)*(%6.4lf-%6.4lf) = 0.5*(%6.4lf)*(%6.4lf) = %6.4lf %6.4lf\n", tpf,tpf_prev,fpf,fpf_prev,tpf+tpf_prev,fpf-fpf_prev,0.5*(tpf+tpf_prev)*(fpf-fpf_prev),roc_area); */ } } /* now let's do the PRECISION/RECALL curve and F */ /* while we're at it, do the lift curve as well */ tt = 0; tf = total_true_1; ft = 0; ff = total_true_0; if ((tt+ft) > 0.0) precision = ((double) tt) / ((double) (tt+ft)); else precision = -1; recall = ((double) tt) / ((double) (tt+tf)); precision_prev = precision; recall_prev = recall; if ( pr_plot && precision >= 0.0 ) printf ("%6.4lf %6.4lf\n", precision, recall); for (item=no_item-1; item>-1; item--) { tt+= true[item]; tf-= true[item]; ft+= 1 - true[item]; ff-= 1 - true[item]; if ( ( item > 0 && pred[item] != pred[item-1] ) || item == 0 ) { if ((tt+ft) > 0.0) precision = ((double) tt) / ((double) (tt+ft)); else precision = -1; recall = ((double) tt) / ((double) (tt+tf)); /* if ( precision == recall ) pr_break_even = precision; if ( precision < recall && precision_prev > recall_prev ) */ /* pr_break_even == (precision + recall + precision_prev + recall_prev)/4.0; average is easy, but linear interpolation probably is better. see next. */ /* if (recall != recall_prev) pr_break_even = (precision - (precision_prev - precision)/(recall_prev - recall)*recall)/(1.0 - (precision_prev - precision)/(recall_prev - recall)); else pr_break_even = recall; */ precision_prev = precision; recall_prev = recall; if ( pr_plot && precision >= 0.0 ) printf ("%6.4lf %6.4lf\n", precision, recall); frac_above_threshold = ((double) (tt + ft)) / ((double) (total_true_1 + total_true_0)); lift = (((double) tt) / ((double) total_true_1) + eps) / (frac_above_threshold + eps); if ( lift_plot ) printf ("%6.4lf %6.4lf\n", frac_above_threshold, lift); } } max_lift = (total_true_1 + total_true_0) / (total_true_1 + eps); min_lift = 0.0; /* now do mean average PRECISION and find the BREAK_EVEN point */ /* note: the approach followed here may not be standard when it comes to ties */ /* note: the approach followed here computes the area under the precision/recall curve, not just the average precision at recall = 0,.1,.2,..,1.0 */ tt = 0; tf = total_true_1; ft = 0; ff = total_true_0; apr = 0.0; precision = -1; recall = 0.0; precision_prev = precision; recall_prev = recall; cnt = 0; onecnt = 0; lasttt=tt; lasttf=tf; lastft=ft; lastff=ff; for (item=no_item-1; item>-1; item--) { cnt++; if ( true[item] == 1) onecnt++; tt+= true[item]; tf-= true[item]; ft+= 1 - true[item]; ff-= 1 - true[item]; if ( ( item > 0 && pred[item] != pred[item-1] ) || item == 0 ) { prcones = ((double)onecnt/cnt); for ( i=1;i<=cnt;i++) { precision = (lasttt+i*prcones)/(lasttt+lastft+i); recall = (lasttt+i*prcones)/total_true_1; if ( precision_prev > 0.0) apr += 0.5*(precision+precision_prev)*(recall-recall_prev); if ( precision == recall && pr_break_even == 0 ) pr_break_even = precision; if ( precision < recall && precision_prev > recall_prev && pr_break_even == 0 ) /* pr_break_even == (precision + recall + precision_prev + recall_prev)/4.0; average is easy, but linear interpolation probably is better. see next. */ { if (recall != recall_prev) pr_break_even = (precision - (precision_prev - precision)/(recall_prev - recall)*recall)/(1.0 - (precision_prev - precision)/(recall_prev - recall)); else pr_break_even = recall; } precision_prev = precision; recall_prev = recall; /* if ( pr_plot && precision >= 0.0 ) printf ("%6.4lf %6.4lf\n", precision, recall); */ } cnt = 0; onecnt = 0; lasttt=tt; lasttf=tf; lastft=ft; lastff=ff; } } /* calculate SLAC's Q-score */ /* slacq_window = 0.0; */ /* slac_q = 0.0; */ /* n_tot = 0.0; */ /* n_cor = 0.0; */ /* last_p = pred[0]; */ /* for (item=0; item= (0.5 + 0.5*slacq_window) ) */ /* { */ /* n_cor+= true[item]; */ /* n_tot+= 1.0; */ /* } */ /* if ( pred[item] < (0.5 - 0.5*slacq_window) ) */ /* { */ /* n_cor+= 1.0-true[item]; */ /* n_tot+= 1.0; */ /* } */ /* /\* printf ("%d %lf %lf %lf %lf %lf %lf\n", item, true[item], pred[item], n_cor, n_tot, slac_w, slac_q); *\/ */ /* } */ /* else */ /* { */ /* last_p = pred[item]; */ /* if ( n_tot > 0.0 ) */ /* { */ /* slac_w = 1.0 - (n_cor/n_tot); */ /* slac_q+= n_tot * (1.0 - 2.0*slac_w) * (1.0 - 2.0*slac_w); */ /* } */ /* n_tot = 0.0; */ /* n_cor = 0.0; */ /* if ( pred[item] >= (0.5 + 0.5*slacq_window) ) */ /* { */ /* n_cor+= true[item]; */ /* n_tot+= 1.0; */ /* } */ /* if ( pred[item] < (0.5 - 0.5*slacq_window) ) */ /* { */ /* n_cor+= 1.0-true[item]; */ /* n_tot+= 1.0; */ /* } */ /* /\* printf ("%d %lf %lf %lf %lf %lf %lf\n", item, true[item], pred[item], n_cor, n_tot, slac_w, slac_q); *\/ */ /* } */ /* } */ /* if ( n_tot > 0.0 ) */ /* { */ /* slac_w = 1.0 - (n_cor/n_tot); */ /* slac_q+= n_tot * (1.0 - 2.0*slac_w) * (1.0 - 2.0*slac_w); */ /* } */ /* calculate improved SLAC Q-score using two test set samples and evenly spaced bins */ /* pb is predicted b, pbb is predicted bbar, tb is true b, tbb is true bbar, ... */ slac_q_sum = 0.0; slac_q_sumsquares = 0.0; srandom(slacq_seed); for (trial=0; trial 0.5 ) slacq_tb[set][bin]++; else slacq_tbb[set][bin]++; } slac_q = 0.0; slac_qn = 0; for (bin=0; bin slacq_tbb[0][bin] ) slac_w = 1.0 - ( ((double)slacq_tb[1][bin]) / ( ((double)(slacq_tb[1][bin]+slacq_tbb[1][bin])) + eps) ); else slac_w = 1.0 - ( ((double)slacq_tbb[1][bin]) / ( ((double)(slacq_tb[1][bin]+slacq_tbb[1][bin])) + eps) ); slac_q += ((double)(slacq_tb[1][bin]+slacq_tbb[1][bin])) * (1.0 - 2.0*slac_w) * (1.0 - 2.0*slac_w); slac_qn += slacq_tb[1][bin]+slacq_tbb[1][bin]; } slac_q = slac_q / ((double)slac_qn); slac_q_sum += slac_q; slac_q_sumsquares += slac_q*slac_q; } slac_q_mean = slac_q_sum / ((double)slacq_trials); if ( slacq_trials > 1 ) slac_q_sdev = sqrt(slac_q_sumsquares/((double)slacq_trials) - slac_q_mean*slac_q_mean); else slac_q_sdev = 0.0; /* calculate mean absolute calibration error for bins with cal_bin_size points in them */ if ( cal_flag ) { cal_sum = 0.0; cal_sum_n = 0; for (i=0; i<19; i++) { plow = i * 0.05; phigh = plow + 0.1; cal_f = 0; cal_t = 0; sum_p = 0.0; for (item=0; item= plow && pred[item] <= phigh ) { if ( true[item] == 1 ) cal_t++; else cal_f++; sum_p += pred[item]; } obs_p = ((double) cal_t) / (((double) (cal_t + cal_f)) + eps); cal_err = fabs(obs_p - (sum_p / (((double) (cal_t + cal_f)) + eps))); cal_sum += cal_err; cal_sumsquares += cal_err*cal_err; cal_sum_n++; } cal_mean_err = cal_sum / ((double) cal_sum_n); } /* cal_sum = 0.0; */ /* cal_sum_n = 0; */ /* cal_sumsquares = 0.0; */ /* cal_f = 0; */ /* cal_t = 0; */ /* sum_p = 0.0; */ /* for (item=0; item= (cal_bin_size - 1)) && (item < (no_item - cal_bin_size)) ) */ /* { */ /* obs_p = ((double) cal_t) / ((double) (cal_t + cal_f)); */ /* cal_err = fabs(obs_p - (sum_p / ((double) cal_bin_size))); */ /* cal_sum += cal_err; */ /* cal_sumsquares += cal_err*cal_err; */ /* cal_sum_n++; */ /* if ( true[item - cal_bin_size + 1] > 0.5 ) */ /* cal_t--; */ /* else */ /* cal_f--; */ /* sum_p -= pred[item - cal_bin_size + 1]; */ /* } */ /* } */ /* cal_mean_err = cal_sum / ((double) cal_sum_n); */ /* } */ if ( (stats || area) && (roc_plot || acc_plot) ) printf ("\n"); if (debug) printf ("%f %f %f %f\n", a,b,c,d); if ( loss == 0 ) { if ( stats || confusion ) { printf ("ACC %8.5lf pred_thresh %9.6lf\n", ((double) (a+d)) / (((double) (a+b+c+d))+eps), pred_thresh); printf ("PPV %8.5lf pred_thresh %9.6lf\n", ((double) (a)) / (((double) (a+c))+eps), pred_thresh); printf ("NPV %8.5lf pred_thresh %9.6lf\n", ((double) (d)) / (((double) (b+d))+eps), pred_thresh); printf ("SEN %8.5lf pred_thresh %9.6lf\n", ((double) (a)) / (((double) (a+b))+eps), pred_thresh); printf ("SPC %8.5lf pred_thresh %9.6lf\n", ((double) (d)) / (((double) (c+d))+eps), pred_thresh); precision = ((double) (a)) / (((double) (a+c))+eps); recall = ((double) (a)) / (((double) (a+b))+eps); printf ("PRE %8.5lf pred_thresh %9.6lf\n", precision, pred_thresh); printf ("REC %8.5lf pred_thresh %9.6lf\n", recall, pred_thresh); printf ("PRF %8.5lf pred_thresh %9.6lf\n", (2.0 * precision * recall)/(precision+recall), pred_thresh); printf ("LFT %8.5lf pred_thresh %9.6lf\n", (((double) a) / ((double) total_true_1)) / (((double) (a + c)) / ((double) (total_true_1 + total_true_0)) + eps), pred_thresh); if ( all ) printf ("ALL %8.5lf pred_thresh %9.6lf wacc %9.6lf wroc %9.6lf wrms %9.6lf \n", allwacc*((double) (a+d)) / (((double) (a+b+c+d))+eps) + allwroc*roc_area + allwrms*(1.0-rmse), pred_thresh, allwacc, allwroc, allwrms); if ( costs ) printf ("CST %7.2lf pred_thresh %9.6lf\n", (cost_a*((double)a) + cost_b*((double)b) + cost_c*((double)c) + cost_d*((double)d)), pred_thresh); if ( confusion ) print_confusion_table (a,b,c,d,pred_thresh); printf ("\n"); if (debug) printf ("%f %f %f %f\n", freq_a,freq_b,freq_c,freq_d); printf ("ACC %8.5lf freq_thresh %9.6lf\n", ((double) (freq_a+freq_d)) / (((double) (freq_a+freq_b+freq_c+freq_d))+eps), freq_thresh); printf ("PPV %8.5lf freq_thresh %9.6lf\n", ((double) (freq_a)) / (((double) (freq_a+freq_c))+eps), freq_thresh); printf ("NPV %8.5lf freq_thresh %9.6lf\n", ((double) (freq_d)) / (((double) (freq_b+freq_d))+eps), freq_thresh); printf ("SEN %8.5lf freq_thresh %9.6lf\n", ((double) (freq_a)) / (((double) (freq_a+freq_b))+eps), freq_thresh); printf ("SPC %8.5lf freq_thresh %9.6lf\n", ((double) (freq_d)) / (((double) (freq_c+freq_d))+eps), freq_thresh); precision = ((double) (freq_a)) / (((double) (freq_a+freq_c))+eps); recall = ((double) (freq_a)) / (((double) (freq_a+freq_b))+eps); printf ("PRE %8.5lf freq_thresh %9.6lf\n", precision, freq_thresh); printf ("REC %8.5lf freq_thresh %9.6lf\n", recall, freq_thresh); printf ("PRF %8.5lf freq_thresh %9.6lf\n", (2.0 * precision * recall)/(precision+recall), freq_thresh); printf ("LFT %8.5lf freq_thresh %9.6lf\n", (((double) freq_a) / ((double) total_true_1)) / (((double) (freq_a + freq_c)) / ((double) (total_true_1 + total_true_0)) + eps), freq_thresh); if ( all ) printf ("ALL %8.5lf freq_thresh %9.6lf wacc %9.6lf wroc %9.6lf wrms %9.6lf \n", allwacc*((double) (freq_a+freq_d)) / (((double) (freq_a+freq_b+freq_c+freq_d))+eps) + allwroc*roc_area + allwrms*(1.0-rmse), freq_thresh, allwacc, allwroc, allwrms); if ( costs ) printf ("CST %7.2lf freq_thresh %9.6lf\n", (cost_a*((double)freq_a) + cost_b*((double)freq_b) + cost_c*((double)freq_c) + cost_d*((double)freq_d)), freq_thresh); if ( confusion ) print_confusion_table (freq_a,freq_b,freq_c,freq_d,freq_thresh); printf ("\n"); if (debug) printf ("%f %f %f %f\n", max_acc_a,max_acc_b,max_acc_c,max_acc_d); if (max_acc_count > 1) printf ("Caution: %d Different Thresholds Achieved Max Accuracy.\nRun with -accplot option to see accuracy vs. threshold.\n\n", max_acc_count); printf ("ACC %8.5lf max_acc_thresh %9.6lf\n", ((double) (max_acc_a+max_acc_d)) / (((double) (max_acc_a+max_acc_b+max_acc_c+max_acc_d))+eps), max_acc_thresh); printf ("PPV %8.5lf max_acc_thresh %9.6lf\n", ((double) (max_acc_a)) / (((double) (max_acc_a+max_acc_c))+eps), max_acc_thresh); printf ("NPV %8.5lf max_acc_thresh %9.6lf\n", ((double) (max_acc_d)) / (((double) (max_acc_b+max_acc_d))+eps), max_acc_thresh); printf ("SEN %8.5lf max_acc_thresh %9.6lf\n", ((double) (max_acc_a)) / (((double) (max_acc_a+max_acc_b))+eps), max_acc_thresh); printf ("SPC %8.5lf max_acc_thresh %9.6lf\n", ((double) (max_acc_d)) / (((double) (max_acc_c+max_acc_d))+eps), max_acc_thresh); precision = ((double) (max_acc_a)) / (((double) (max_acc_a+max_acc_c))+eps); recall = ((double) (max_acc_a)) / (((double) (max_acc_a+max_acc_b))+eps); printf ("PRE %8.5lf max_acc_thresh %9.6lf\n", precision, max_acc_thresh); printf ("REC %8.5lf max_acc_thresh %9.6lf\n", recall, max_acc_thresh); printf ("PRF %8.5lf max_acc_thresh %9.6lf\n", (2.0 * precision * recall)/(precision+recall), max_acc_thresh); printf ("LFT %8.5lf max_acc_thresh %9.6lf\n", (((double) max_acc_a) / ((double) total_true_1)) / (((double) (max_acc_a + max_acc_c)) / ((double) (total_true_1 + total_true_0)) + eps), max_acc_thresh); if ( all ) printf ("ALL %8.5lf max_acc_thresh %9.6lf wacc %9.6lf wroc %9.6lf wrms %9.6lf \n", allwacc*((double) (max_acc_a+max_acc_d)) / (((double) (max_acc_a+max_acc_b+max_acc_c+max_acc_d))+eps) + allwroc*roc_area + allwrms*(1.0-rmse), max_acc_thresh, allwacc, allwroc, allwrms); if ( costs ) printf ("CST %7.2lf max_acc_thresh %9.6lf\n", (cost_a*((double)max_acc_a) + cost_b*((double)max_acc_b) + cost_c*((double)max_acc_c) + cost_d*((double)max_acc_d)), max_acc_thresh); if ( confusion ) print_confusion_table (max_acc_a,max_acc_b,max_acc_c,max_acc_d,max_acc_thresh); printf ("\n"); } if ( monti && !stats ) { printf ("ACC %8.5lf pred_thresh %9.6lf\n", ((double) (a+d)) / (((double) (a+b+c+d))+eps), pred_thresh); printf ("ACC %8.5lf freq_thresh %9.6lf\n", ((double) (freq_a+freq_d)) / (((double) (freq_a+freq_b+freq_c+freq_d))+eps), freq_thresh); if (max_acc_count > 1) printf ("Caution: %d Different Thresholds Achieved Max Accuracy.\nRun with -accplot option to see accuracy vs. threshold.\n\n", max_acc_count); printf ("ACC %8.5lf max_acc_thresh %9.6lf\n", ((double) (max_acc_a+max_acc_d)) / (((double) (max_acc_a+max_acc_b+max_acc_c+max_acc_d))+eps), max_acc_thresh); } if ( ace ) printf ("ACE %8.5lf\n", -1.0 * ace_sum / ((double) no_item)); if ( breyce ) printf ("BRE %8.5lf\n", breyce_sum / ((double) no_item)); if ( (ace || breyce) && area && !monti ) printf ("\n"); if ( break_even || stats ) printf ("PRB %8.5lf\n", pr_break_even); if ( stats ) printf ("APR %8.5lf\n", apr); if ( nrm ) printf ("NRM %8.5lf\n", norm_err); if ( easy ) { printf ("ACC %8.5lf\n", ((double) (a+d)) / (((double) (a+b+c+d))+eps)); } if ( easy||stats ) printf ("RMS %8.5lf\n", rmse); if ( stats ) printf ("CXE %8.5lf\n",cxe); if ( area ) printf ("ROC %8.5lf\n", roc_area); if ( costs ) printf ("MIN_COST %8.2lf Threshold %9.6lf\n", min_cost, min_cost_thresh); if ( slacq_flag ) printf ("SLQ %8.5lf SDEV %9.6lf Bin_Width %9.6lf\n", slac_q_mean, slac_q_sdev, slacq_bin_width); if ( cal_flag ) printf ("CAL %8.5lf Bin_Size %d\n", cal_mean_err, cal_bin_size); } if ( loss == 1 ) { if ( stats || confusion ) { printf ("ACC %8.5lf pred_thresh %9.6lf\n", 1.0 - (((double) (a+d)) / (((double) (a+b+c+d))+eps)), pred_thresh); printf ("PPV %8.5lf pred_thresh %9.6lf\n", 1.0 - (((double) (a)) / (((double) (a+c))+eps)), pred_thresh); printf ("NPV %8.5lf pred_thresh %9.6lf\n", 1.0 - (((double) (d)) / (((double) (b+d))+eps)), pred_thresh); printf ("SEN %8.5lf pred_thresh %9.6lf\n", 1.0 - (((double) (a)) / (((double) (a+b))+eps)), pred_thresh); printf ("SPC %8.5lf pred_thresh %9.6lf\n", 1.0 - (((double) (d)) / (((double) (c+d))+eps)), pred_thresh); precision = ((double) (a)) / (((double) (a+c))+eps); recall = ((double) (a)) / (((double) (a+b))+eps); printf ("PRE %8.5lf pred_thresh %9.6lf\n", 1.0 - precision, pred_thresh); printf ("REC %8.5lf pred_thresh %9.6lf\n", 1.0 - recall, pred_thresh); printf ("PRF %8.5lf pred_thresh %9.6lf\n", 1.0 - ((2.0 * precision * recall)/(precision+recall)), pred_thresh); printf ("LFT %8.5lf pred_thresh %9.6lf\n", (max_lift - (((double) a) / ((double) total_true_1)) / (((double) (a + c)) / ((double) (total_true_1 + total_true_0)) + eps))/(max_lift-min_lift+eps), pred_thresh); if ( all ) printf ("ALL %8.5lf pred_thresh %9.6lf wacc %9.6lf wroc %9.6lf wrms %9.6lf \n", 1.0 - (allwacc*((double) (a+d)) / (((double) (a+b+c+d))+eps) + allwroc*roc_area + allwrms*(1.0-rmse)), pred_thresh, allwacc, allwroc, allwrms); if ( costs ) printf ("CST %7.2lf pred_thresh %9.6lf\n", (cost_a*((double)a) + cost_b*((double)b) + cost_c*((double)c) + cost_d*((double)d)), pred_thresh); if ( confusion ) print_confusion_table (a,b,c,d,pred_thresh); printf ("\n"); if (debug) printf ("%f %f %f %f\n", freq_a,freq_b,freq_c,freq_d); printf ("ACC %8.5lf freq_thresh %9.6lf\n", 1.0 - (((double) (freq_a+freq_d)) / (((double) (freq_a+freq_b+freq_c+freq_d))+eps)), freq_thresh); printf ("PPV %8.5lf freq_thresh %9.6lf\n", 1.0 - (((double) (freq_a)) / (((double) (freq_a+freq_c))+eps)), freq_thresh); printf ("NPV %8.5lf freq_thresh %9.6lf\n", 1.0 - (((double) (freq_d)) / (((double) (freq_b+freq_d))+eps)), freq_thresh); printf ("SEN %8.5lf freq_thresh %9.6lf\n", 1.0 - (((double) (freq_a)) / (((double) (freq_a+freq_b))+eps)), freq_thresh); printf ("SPC %8.5lf freq_thresh %9.6lf\n", 1.0 - (((double) (freq_d)) / (((double) (freq_c+freq_d))+eps)), freq_thresh); precision = ((double) (freq_a)) / (((double) (freq_a+freq_c))+eps); recall = ((double) (freq_a)) / (((double) (freq_a+freq_b))+eps); printf ("PRE %8.5lf freq_thresh %9.6lf\n", 1.0 - precision, freq_thresh); printf ("REC %8.5lf freq_thresh %9.6lf\n", 1.0 - recall, freq_thresh); printf ("PRF %8.5lf freq_thresh %9.6lf\n", 1.0 - ((2.0 * precision * recall)/(precision+recall)), freq_thresh); printf ("LFT %8.5lf freq_thresh %9.6lf\n", (max_lift - (((double) freq_a) / ((double) total_true_1)) / (((double) (freq_a + freq_c)) / ((double) (total_true_1 + total_true_0)) + eps))/(max_lift-min_lift+eps), freq_thresh); if ( all ) printf ("ALL %8.5lf freq_thresh %9.6lf wacc %9.6lf wroc %9.6lf wrms %9.6lf \n", 1.0 - (allwacc*((double) (freq_a+freq_d)) / (((double) (freq_a+freq_b+freq_c+freq_d))+eps) + allwroc*roc_area + allwrms*(1.0-rmse)), freq_thresh, allwacc, allwroc, allwrms); if ( costs ) printf ("CST %7.2lf freq_thresh %9.6lf\n", (cost_a*((double)freq_a) + cost_b*((double)freq_b) + cost_c*((double)freq_c) + cost_d*((double)freq_d)), freq_thresh); if ( confusion ) print_confusion_table (freq_a,freq_b,freq_c,freq_d,freq_thresh); printf ("\n"); if (debug) printf ("%f %f %f %f\n", max_acc_a,max_acc_b,max_acc_c,max_acc_d); if (max_acc_count > 1) printf ("Caution: %d Different Thresholds Achieved Max Accuracy.\nRun with -accplot option to see accuracy vs. threshold.\n\n", max_acc_count); printf ("ACC %8.5lf max_acc_thresh %9.6lf\n", 1.0 - (((double) (max_acc_a+max_acc_d)) / (((double) (max_acc_a+max_acc_b+max_acc_c+max_acc_d))+eps)), max_acc_thresh); printf ("PPV %8.5lf max_acc_thresh %9.6lf\n", 1.0 - (((double) (max_acc_a)) / (((double) (max_acc_a+max_acc_c))+eps)), max_acc_thresh); printf ("NPV %8.5lf max_acc_thresh %9.6lf\n", 1.0 - (((double) (max_acc_d)) / (((double) (max_acc_b+max_acc_d))+eps)), max_acc_thresh); printf ("SEN %8.5lf max_acc_thresh %9.6lf\n", 1.0 - (((double) (max_acc_a)) / (((double) (max_acc_a+max_acc_b))+eps)), max_acc_thresh); printf ("SPC %8.5lf max_acc_thresh %9.6lf\n", 1.0 - (((double) (max_acc_d)) / (((double) (max_acc_c+max_acc_d))+eps)), max_acc_thresh); precision = ((double) (max_acc_a)) / (((double) (max_acc_a+max_acc_c))+eps); recall = ((double) (max_acc_a)) / (((double) (max_acc_a+max_acc_b))+eps); printf ("PRE %8.5lf max_acc_thresh %9.6lf\n", 1.0 - precision, max_acc_thresh); printf ("REC %8.5lf max_acc_thresh %9.6lf\n", 1.0 - recall, max_acc_thresh); printf ("PRF %8.5lf max_acc_thresh %9.6lf\n", 1.0 - ((2.0 * precision * recall)/(precision+recall)), max_acc_thresh); printf ("LFT %8.5lf max_acc_thresh %9.6lf\n", (max_lift - (((double) max_acc_a) / ((double) total_true_1)) / (((double) (max_acc_a + max_acc_c)) / ((double) (total_true_1 + total_true_0)) + eps))/(max_lift-min_lift+eps), max_acc_thresh); if ( all ) printf ("ALL %8.5lf max_acc_thresh %9.6lf wacc %9.6lf wroc %9.6lf wrms %9.6lf \n", 1.0 - (allwacc*((double) (max_acc_a+max_acc_d)) / (((double) (max_acc_a+max_acc_b+max_acc_c+max_acc_d))+eps) + allwroc*roc_area + allwrms*(1.0-rmse)), max_acc_thresh, allwacc, allwroc, allwrms); if ( costs ) printf ("CST %7.2lf max_acc_thresh %9.6lf\n", (cost_a*((double)max_acc_a) + cost_b*((double)max_acc_b) + cost_c*((double)max_acc_c) + cost_d*((double)max_acc_d)), max_acc_thresh); if ( confusion ) print_confusion_table (max_acc_a,max_acc_b,max_acc_c,max_acc_d,max_acc_thresh); printf ("\n"); } if ( monti && !stats ) { printf ("ACC %8.5lf pred_thresh %9.6lf\n", 1.0 - (((double) (a+d)) / (((double) (a+b+c+d))+eps)), pred_thresh); printf ("ACC %8.5lf freq_thresh %9.6lf\n", 1.0 - (((double) (freq_a+freq_d)) / (((double) (freq_a+freq_b+freq_c+freq_d))+eps)), freq_thresh); if (max_acc_count > 1) printf ("Caution: %d Different Thresholds Achieved Max Accuracy.\nRun with -accplot option to see accuracy vs. threshold.\n\n", max_acc_count); printf ("ACC %8.5lf max_acc_thresh %9.6lf\n", 1.0 - (((double) (max_acc_a+max_acc_d)) / (((double) (max_acc_a+max_acc_b+max_acc_c+max_acc_d))+eps)), max_acc_thresh); } if ( ace ) printf ("ACE %8.5lf\n", -1.0 * ace_sum / ((double) no_item)); if ( breyce ) printf ("BRE %8.5lf\n", breyce_sum / ((double) no_item)); if ( (ace || breyce) && area && !monti ) printf ("\n"); if ( break_even || stats ) printf ("PRB %8.5lf\n", 1.0 - pr_break_even); if ( stats ) printf ("APR %8.5lf\n", 1.0 - apr); if ( nrm ) printf ("NRM %8.5lf\n", norm_err); if ( easy ) { printf ("ACC %8.5lf\n", 1.0 - (((double) (a+d)) / (((double) (a+b+c+d))+eps))); } if ( easy || stats ) printf ("RMS %8.5lf\n", rmse); if ( stats ) printf ("CXE %8.5lf\n",cxe); if ( area ) printf ("ROC %8.5lf\n", 1.0 - roc_area); if ( costs ) printf ("MIN_COST %8.2lf Threshold %9.6lf\n", min_cost, min_cost_thresh); if ( slacq_flag ) printf ("SLQ %8.5lf SDEV %9.6lf Bin_Width %9.6lf\n", 1.0-slac_q_mean, slac_q_sdev, slacq_bin_width); if ( cal_flag ) printf ("CAL %8.5lf Bin_Size %d\n", cal_mean_err, cal_bin_size); } return 0; }