#!/usr/bin/perl
# file: pbsearch		G. Moody	24 February 2012
#				Last revised:	  13 March 2012
# _____________________________________________________________________________
# PhysioBank CGI search client
#
# This program provides a web interface for searching in PhysioBank.  It
# collects user input, constructs queries for the PhysioBank Simple Query
# Server (pbsqs), collects the server's replies, and makes them available to
# the user.

use IO::Socket;
use CGI qw/:standard/;
use CGI::Carp 'fatalsToBrowser';

my $atmurl = '';  # if set, http://$host/cgi-bin/ATM...
my $bindir = "/usr/local/bin/";
my $pbsdir = "/home/physionet/html/physiobank/database/pbs/";
my $rec_all= "/home/physionet/html/physiobank/database/RECORDS-ALL";
my @digits = split(//,"ABCDEFGHJKLMNPQRSTUVWXYZ");
my $help_on;
my $id;      # value of pbs_id cookie
my $utmp   = "/atm/pbs";
my $dtmp   = "/ptmp$utmp"; # directory for files of query results
my $ofdir;   # $dtmp/$id
my $ofurl;   # $utmp/$id
my $ofname;  # $ofdir/$tag
my $qfname;  # $ofdir/index.html
my $tfname;  # $ofdir/index.html~
my $tag;     # A, B, C ...

pbs();

sub pbs {
    read_param();
    banner();
    if ($atmurl) {
	print "<pre>Attempted unsucessfully to redirect to $atmurl</pre>";
	$atmurl = '';
    }
    show_sq_form();
    if ($sq_action eq 'Get List') { run_simple_query(); }
    if ($res_action) { run_compound_query(); }
    if ($rescount >= 0) { show_results(); }
    if ($help_on eq 'on') { show_help(); }
    footer();
}

# read_param: read state variables
sub read_param {
    if (!param) {
	$rescount = -1;
    }
    else {
	$res_action = param('res_action');
	@qrlist = param('qrlist');
	if ($res_action eq 'Choose') {
	    if (scalar(@qrlist) == 1) {
		$host = server_name();
		$atmurl = "http://$host/cgi-bin/ATM?database=$qrlist[0]";
		print redirect(-url=>"$atmurl");
	    }
	}
	$subject = param('subject');
	$comp_op = param('comp_op'); 
	$sval = param('sval');
	$name_num = param('name_num');
	$help_on = param('help_on');
    }
}

# banner: start HTML output, read/set cookie, and print the PhysioBank banner
sub banner {
    my $title='PhysioBank Search';
    my %tags = ( 'http-equiv', 'Content-Type',
		 'content', 'text/html',
		 'charset', 'utf-8' );
    $id = cookie('pbs_id');
    $res_action = param('res_action');
    print header;
    show_html("doctype");
    if (!$id) { # this user hasn't been here recently -- no previous session
	$id = set_pbs_id();
    }
    elsif ($res_action eq 'Restore previous session') {
	$rescount = 0;
	param(-name=>'id', -value=>$id);
	refresh_cookie();
    }
    elsif (param('id')) {  # this is part of an active session
	$sq_action = param('sq_action');
	if ($sq_action eq 'Discard previous results') { $id = set_pbs_id(); }
	refresh_cookie();
    }
    else { # this is the beginning of a new session following a previous one
	$sq_action = '';
    }
    $ofurl = "$utmp/$id";
    $ofdir = "$dtmp/$id";
    mkdir($ofdir);
    if (@qrlist) { $qrl = "$ofdir/" . join " $ofdir/", @qrlist; }
    $qfname = $ofdir . "/index.html"; # index of files of query results
    $tfname = $qfname . "~"; # previous version of $qfname

    show_html("head");
}

# show_help: show instructions
sub show_help {
    show_html("help");
}

# show_sq_form: show form for composing a simple query
sub show_sq_form {
    my @slist =   ('age', 'diag', 'info', 'med', 'record', 'sex', 'SPACER',
		   'SPNAME', 'signal', 'aname', 'antype', 'SPACER', 'SPNUM',
		   'SPANN', 'annm', 'annr', 'SPSIG',
		   'BP', 'CO', 'CO2', 'ECG', 'EEG', 'EMG', 'EOG', 'EP',
		   'Flow', 'HR', 'Noise', 'O2', 'PLETH',  'Pos', 'Resp',
		   'Sound', 'ST', 'Status', 'Stim', 'SV', 'Temp', 'Unknown');
    my %slabels = (age		=> 'Age (years)',
		   diag		=> 'Diagnosis',
		   info		=> 'Info (other metadata)',
		   med		=> 'Medication',
		   record	=> 'Record name',
		   sex		=> 'Sex (F, M, or ?)',
		   SPNAME	=> '--- Name required, enter below ---',
		   SPNUM        => '-- Number optional, enter below --',
		   SPANN	=> '....... Annotator classes ........',
		   SPSIG	=> '......... Signal classes .........',
		   SPACER	=> '__________________________________',
		   annm		=> '(#) Annotated by software',
		   annr		=> '(#) Annotated by human',
		   aname	=> 'Annotated by ... (name)',
		   antype	=> 'Annotation type ... (name)',
		   signal	=> 'Signal named ... (name)',
		   BP		=> '(#) Blood pressure',
		   CO		=> '(#) Cardiac output',
		   CO2		=> '(#) CO2',
		   ECG		=> '(#) ECG',
		   EEG		=> '(#) EEG',
		   EMG		=> '(#) EMG',
		   EOG		=> '(#) EOG',
		   EP		=> '(#) Evoked potential',
		   Flow		=> '(#) Flow',
		   HR		=> '(#) Heart rate',
		   Noise	=> '(#) Noise',
		   O2		=> '(#) O2',
		   PLETH	=> '(#) Photoplethysmogram',
		   Pos		=> '(#) Position',
		   Resp		=> '(#) Respiration',
		   Sound	=> '(#) Sound',
		   ST		=> '(#) ST level',
		   Status	=> '(#) Status',
		   Stim		=> '(#) Stimuli',
		   SV		=> '(#) Stroke volume',
		   Temp		=> '(#) Temperature',
		   Unknown	=> 'Unknown');

    my @rlist =   ('=', '<', '<=', '>=', '>', '~', '!~', '!=', '?');
    my $rtitle = "'~' means 'similar', '!~' means 'different', '?' means
 'is defined'.  If the subject element contains the (string) value, or is
 within 10% of the (numeric) value, it is similar.";

    my $vtitle = "Enter the number or character string to compare with the
 chosen subject elements.  Values containing ':' are durations (e.g., 1:30:0
 means 1 hour, 30 minutes).  Values ending with 'Hz' are sampling frequencies
 (e.g., 150Hz), and those containing 'adu/' are gains (e.g., 10 adu/mmHg).";

    my $gtitle = "Choose a Subject, Relationship, and Value at left, then click
 Get List to find PhysioBank records that satisfy the condition.  A link to the
 list will appear at the top of the section of results below.";

    my $ntitle = "If you have chosen from the menu above a subject marked with
 '(name)', enter the annotator name, annotation type, or signal name here.  If
 you have chosen a subject class marked with '#', you may enter a number here
 to restrict the search to records having at least that many members of the
 class.";

    print '<div style="margin: 0 -1em;">' .
	  start_form,
          '<center><table><tr><th>Subject</th><th>Relationship</th>' .
	    '<th>Value</th><td>&nbsp;</td></tr>',
	  '<tr><td>' .
	  popup_menu(-name=>'subject', -value=>[@slist], -labels=>{%slabels}) .
	  '</td><td>' .
	  popup_menu(-name=>'comp_op', -value=>[@rlist], -title=>$rtitle) .
	  '</td><td>' .
          textfield(-name=>'sval', -value=>$sval, -size=>25, -title=>$vtitle) .
	  '</td><td>' .
	  submit(-name=>'sq_action', value=>'Get List', -title=>$gtitle) .
	  '</td></tr>',
	  '<tr><td align="center"><small>Name/#:</small>' .
	  textfield(-name=>'name_num',
		    -value=>$name_num, -size=>15, -title=>$ntitle) .
          '&nbsp;</td><td></td><td><small>',
	   checkbox(-name=>'help_on', -label=>'Show/Hide Help',
		    -onclick=>'submit()') .
	  '</small></td></tr></table>' .
	  hidden(-name=>'id', -value=>$id);
    if (!param('id')) {
	print '<p>&nbsp;'.
	    submit(-name=>'res_action', -value=>'Restore previous session') .
	    submit(-name=>'sq_action', -value=>'Discard previous results');
    }
    print '</center>';
}

# show_results: show summary and links to results of previous queries
sub show_results {
    show_html("results");
    print '<div style="margin: -0.5em 1em 1em 1em;">';
    show_html_in_columns($qfname);
    print "</div>";
}
 
# footer:  finish the page
sub footer {
    show_html("footer");
    print end_html;
}

sub make_qrlist {
    if (open(QFILE, $qfname)) {
	$last = <QFILE>;
	@tokens = split(" ", $last);
	$n = tag2int($tokens[5]) + 1;
	int2tag($n);
	close(QFILE);
    }
    else { $tag = "A"; }
    $ofname = "$ofdir/$tag";
}

sub update_qindex {
    # prepend this set of results to the query results index
    rename $qfname, $tfname;
    $cksum = `cksum $ofdir/$tag | cut '-d ' -f1`;
    chomp($cksum);
    open QFILE, '>', $qfname or die $!;
    print QFILE "<br><input type=\"checkbox\" name=\"qrlist\" value=\"$tag\" />"
	. " $tag [$rescount] <a href=$ofurl/$tag?cksum=$cksum>$query</a>\n";
    # If a list is erased and then a new list with the same name is created,
    # the CRC checksum will be different (with very high probability, unless
    # the search produced the same results).  Including the CRC as extra
    # pathinfo, as above, ensures that if it has changed, the browser will
    # reread the new list rather than displaying the (stale) cached list.
    if (open(TFILE, $tfname)) {
	while (<TFILE>) {
	    print QFILE $_;
	}
	close TFILE;
    }
    close QFILE;
}

sub run_compound_query {
    $ofname = "/dev/null";
    if ($res_action eq 'And') {
	if (scalar(@qrlist) > 1) {
	    $command = $bindir . "pbs-and $qrl";
	    $query = join " &#8745; ", @qrlist;
	    make_qrlist($query);
	}
    }
    elsif ($res_action eq 'Or') {
	if (scalar(@qrlist) > 1) {
	    $command = $bindir . "pbs-or $qrl";
	    $query = join " &#8746; ", @qrlist;
	    make_qrlist($query);
	}
    }
    elsif ($res_action eq 'Not') {
	if (scalar(@qrlist) == 1) {
	    $command = $bindir . "pbs-not $rec_all $qrl";
	    $query = "&not; @qrlist[0]";
	    make_qrlist($query);
	}
    }
    # 'Choose' is handled at the top because redirect must precede other output
    elsif ($res_action eq 'Erase') {
	if (scalar(@qrlist) >= 1) {
	    $command = $bindir . "pbs-erase $qrl";
	}
    }

    if ($command) {
	# create the query results file
	open OFILE, '>', $ofname or die $!;
	open(CQ, "$command|");
	$rescount = 0;
	while (<CQ>) {
	    print OFILE $_;
	    $rescount++;
	}
	close CQ;
	close OFILE;
	if ($query) {
	    update_qindex();
	}
    }
}

sub run_simple_query {
    if ($subject eq 'aname') {
	if ("x$name_num" ne "x") { $sstring = "\@$name_num"; }
	else { return;}
    }
    elsif ($subject eq 'antype') {
	if ("x$name_num" ne "x") { $sstring = "/$name_num"; }
	else { return;}
    }
    elsif ($subject eq 'signal') {
	if ("x$name_num" ne "x") { $sstring = "\'$name_num\'"; }
	else { return;}
    }
    elsif ($subject =~ m/SP/) { return; }
    elsif ($name_num =~ /^[1-9][0-9]*$/) { $sstring = "$subject-$name_num"; }
    else { $sstring = $subject; }
    $foo = "x". "$sval";
    if ($foo eq 'x') { $comp_op = '?'; param(-name=>'comp_op', -value=>'?'); }
    if ($comp_op eq '?') { $query = "$sstring ?"; }
    elsif ($sval =~ m/ /) { $query = "$sstring $comp_op \"$sval\""; }
    else { $query = "$sstring $comp_op $sval"; }
    make_qrlist($query);
    $rescount = 0;

    # open a connection to pbsqs (port 9967)
    $socket = IO::Socket::INET->new(PeerAddr => 'localhost',
				    PeerPort => 9967,
				    Proto    => 'tcp',
				    Type     => SOCK_STREAM)
	or die "Couldn't connect to localhost:9967 : $@\n";

    # wait for the server's prompt
    do {
	$answer = <$socket>;
    } until ($answer =~ m/^pbs>/);

    # send the query
    print $socket $query . "\n";

    # create the query results file
    open OFILE, '>', $ofname or die $!;
    $answer = <$socket>;   # get the first line of the server's reply
    until ($answer =~ m/^pbs>/) {
	# copy the reply to the results file until it writes another prompt
	print OFILE $answer;
	$rescount++;
	$answer = <$socket>;
    }
    close OFILE;

    # finished, close the socket
    close $socket;

    # update the query index
    update_qindex();
}

# show_html: open a PhysioBank pbsearch HTML file and print it
sub show_html {
   show_file("$pbsdir/$_[0].html");
}

# show_html_in_columns: print a file in columns of up to 12 rows
sub show_html_in_columns {
    if (open(FILE, $_[0])) {
	print "<table><tr><td><div>";
	$row = 0;
	while (<FILE>) {
	    print $_;
	    if (++$row >= 12) {	print "</div></td><td><div>"; $row = 0; }
	}
	close(FILE);
	print "</div></td></tr></table>";
    }
}

# show_file: open a file and print it
sub show_file {
    if (open(FILE, $_[0])) {
	while (<FILE>) {
	    print $_;
	}
	close(FILE);
    }
}

# show_pre: open a file and print it with HTML escapes
sub show_pre {
    if (open(FILE, $_[0])) {
	print "<pre>";
	while (<FILE>) {
	    s/</&lt;/g;
	    s/>/&gt;/g;
	    print $_;
	}
	close(FILE);
	print "</pre>";
    }
}

# int2tag: convert an integer to a list tag.  Tags are bijective base-24
# numerals written using upper-case letters (excluding I and O) as digits:
#  A .. Z, AA .. AZ, BA .. BZ, ...
sub int2tag {
    $n = $_[0];
    $tag = '';
    do {
        $n--;
	$tag = $digits[$n % 24] . $tag;
	$n /= 24;
    } while ($n >= 1);
    $tag;
}

sub tag2int {
    my $tag = $_[0];
    my $n = 0;
    my @tdigits = split(//,$tag);
    foreach $digit (@tdigits) {
	$n *= 24;
	foreach my $i (0..23) {
	    if ($digit eq $digits[$i]) {
		$n += $i+1;
		break;
	    }
	}
    }
    $n;
}

sub set_pbs_id {
    require Digest::MD5;
    my $md5 = new Digest::MD5;
    my $id = $md5->md5_base64(time, $$);
    $id =~ tr|+/=|-_.|;   # Make non-word characters URL-friendly
    print '<meta http-equiv="Set-Cookie" content="' . 
           cookie(-name=>'pbs_id', -value=>"$id", -expires=>'+10d') . '">';
    $id;
}
    

sub refresh_cookie {
    print '<meta http-equiv="Set-Cookie" content="' . 
           cookie(-name=>'pbs_id', -value=>"$id", -expires=>'+10d') . '">';
    $id;
}
