#! /usr/bin/perl -wT

# file: score			B. Moody	8 February 2015
#				Last revised:	1 February 2017

# Challenge 2017 evaluation, stage 4:  calculate scores

use strict;
$ENV{PATH} = '/usr/local/bin:/usr/bin:/bin';

if (-f 'results/exam-deferred') {
  print "Entry accepted for scoring.\n";
  print "Scores will be available after\n";
  print "the end of the Official Phase.\n";
  exit 0;
}

my $scriptname = quotemeta $0;
chomp (my $CHALLENGE = qx(dirname `which $scriptname`));
my $DATA = "$CHALLENGE/data";
my $REFERENCE = "$DATA/test/REFERENCE.csv";
my $SUBSET = "$DATA/test/RECORDS-subset-phase1";
my $TITLE = 'Scores (phase1 subset)';

my %user_answers;
while (my $subdir = <results/exam*>) {
  if (opendir D, $subdir) {
    while ($_ = readdir D) {
      if (/(\w+).txt/) {
        my $record = $1;
        if (open A, "$subdir/$record.txt") {
          $_ = <A>;
          s/[\r\n]//g;
          if (/^[^,]*,([NAO~])$/) {
            $user_answers{$record} = $1;
          }
          close A;
        }
      }
    }
    closedir D;
  }
}

my %records_to_score;
open L, "$SUBSET" or die "cannot read $SUBSET: $!";
while (<L>) {
  chomp;
  $records_to_score{$_} = 1;
}

my %count;
my $n_missing = 0;

open A, $REFERENCE or die "cannot read $REFERENCE: $!";
while (<A>) {
  s/[\r\n]//g;
  my ($record, $real_answer) = split /,/;
  my $user_answer = $user_answers{$record};
  if (!defined $user_answer) {
    $user_answer = '~';
    $n_missing++;
  }
  if ($records_to_score{$record}) {
    $count{$real_answer}->{$user_answer}++;
    $count{$real_answer}->{'*'}++;
    $count{'*'}->{$user_answer}++;
  }
}
close A;

if ($n_missing > 0) {
  print "Warning: Program did not produce output for $n_missing records\n";
  print "  (these will be scored as 'noisy' records.)\n";
  print "\n";
}

my @categories = qw(N A O ~);
my %category_label = ('N' => 'Normal',
                      'A' => 'AF',
                      'O' => 'Other',
                      '~' => 'Noisy');

my %F1;
my $overall = 0;
foreach my $cat (@categories) {
  my $n_correct   = $count{$cat}->{$cat} // 0;
  my $n_reference = $count{$cat}->{'*'} // 0;
  my $n_labelled  = $count{'*'}->{$cat} // 0;

  $F1{$cat} = (2 * $n_correct) / ($n_reference + $n_labelled);
  $overall += $F1{$cat} / @categories;
}

printf "$TITLE:\n";
print "                 F1\n";
print "-------------------\n";
foreach my $cat (@categories) {
  printf "\%-12s \%6.2f\n", $category_label{$cat}, $F1{$cat};
}
print "-------------------\n";
printf "\%-12s \%6.2f\n", 'Overall', $overall;
