#!/usr/local/bin/perl #/usr/local/bin/perl # # ice-form.pl -- cgi compliant ICE search interface // Oct 12 1995 # # (C) Christian Neuss 1995 (neuss@isa.informatik.th-darmstadt.de) #--- start of configuration --- put your changes here --- # Title or name of your server: # Example: local($title)="ICE Indexing Gateway"; local($title)="Priory Lodge Education Search Engine"; # search directories to present in the search dialogue # Example: # local(@directories)=( # "Image Communication Information Board (/icib)", # "WISE (/some/where/wise)" # ); # Original code commented out KHT #local(@directories)=( # "Image Communication Information Board (/icib)", # "WISE (/www/projects/wise)", # "Multimedia Survey (/www/projects/mms)", # "Department A2 (/www/igd-a2)", # "Department A8 (/www/igd-a8)", # "Department A9 (/www/igd-a9)", # "DZSIM (/www/projects/dzsim)", # "CSCW Laboratory (/www/projects/cscw-lab)", # "Software Catalog (/www/projects/sw-catalog)", # "WWW-Schulung (/www/igd-a3/schulung)", # "DZSIM (/www/projects/dzsim)", # "ZGDV User Interface GROUP (/www/zgdv-uig)" #); local(@directories)=(""); # Location of the indexfile: # Example: $indexfile="/usr/local/etc/httpd/index/index.idx"; $indexfile="../cgi-out/index.idx"; # Location of the thesaurus data file: # Example: $thesfile="/igd/a3/home1/neuss/Perl/thes.dat"; $thesfile="../cgi-out/thes.dat"; # URL Mappings (a.k.a Aliases) that your server does # map "/" to some path to reflect a "document root" # Example # %urltopath = ( # '/projects', '/usr/stud/proj', # '/people', '/usr3/webstuff/staff', # '/', '/usr3/webstuff/documents', # ); # #%urltopath = ('/journals', '/users/ad88',); %urltopath = ( '/', '/' ); #--- end of configuration --- don't change anything below --- # if this script is called up "by hand", run a test unless($ENV{"SCRIPT_NAME"}){ local($word) = ($#ARGV==-1) ? "the" : $ARGV[0] ; print "You have called the Priory forms interface manually.\n"; print "Optionally, provide search word as an argument.\n"; print "Test mode: search for \"$word\"\n"; print "--------\n"; $orig="$word @ /"; $foo=&getquery($orig); print $foo; exit; } # do the real work, but trap any errors eval '&main'; # if an error has occured, log it to stdout if($@){ &send_header("Error in Script"); # just in case print "$@\n
\n"; print ""; } # print the CGI script header sub send_header { local($title)=@_; print "Content-type: text/html\n\n"; print "
\n
\n\n"; print "
END
}
# parse data from CGI request and store it as name/value pairs
sub cgiparse {
if ($ENV{'REQUEST_METHOD'} eq "POST") {
read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
} else {
$buffer = $ENV{'QUERY_STRING'};
}
local(@query_strings) = split("&", $buffer);
foreach $q (@query_strings) {
$q =~ s/\+/ /g;
($attr, $val) = split("=", $q);
$val =~ s/%/\n%/g;
local($tmpval);
foreach (split("\n",$val)){
if(m:%(\w\w):){
local($binval) = hex($1);
if(($binval>0)&&($binval<256)){
local($htmlval) = pack("C",$binval);
s/%$1/$htmlval/;
}
}
$tmpval .= $_;
}
$forms{$attr} = $tmpval;
}
%forms;
}
sub main {
# if content_length is zero and query string is empty
if (($ENV{CONTENT_LENGTH}==0) &&
(length($ENV{"QUERY_STRING"})==0)){
# we're not decoding a form yet => send the form
&send_header("$title");
&send_index();
return;
}
&send_header("$title
Search Result");
# parse forms result and store it in an associative array
%forms=&cgiparse();
$pquery = $query = $forms{KEYWORDS};
$context = $forms{CONTEXT};
$context = $forms{CONTEXT};
if($context =~ m:\(([^)]*)\):) {
$context=$1;
}else{
$context="";
}
$thesaurus = $forms{THESAURUS};
$substring = $forms{SUBSTRING};
$days = $forms{DAYS};
if(length($days)>0){
$pquery.=" -D $days";
}
if(length($thesaurus)>0){
$pquery.=" -T";
}
if(length($substring)>0){
$pquery.=" -S";
}
if(length($context)>0){
$pquery.=" @ $context";
}
###print "query $pquery\n";
($err,$page) = &getquery($pquery);
if($err){
print "Query was: $query
\n";
print "Problem: $err\n";
print "";
return undef;
}
print "Preferences set for this query:\n";
print "
\n"; if($page){ print "
The index contains the following\n"; print "items relevant to the query\n"; print "$page\n"; }else{ print "
Nothing found.\n"; } print "\n"; } ########### begin of ice kernel functions, formerly ice.pl ########## sub getquery{ local($query)=@_; local($page)="
";
}else{
$page = "";
}
("",$page); # return;
}
# parse query
sub parsequery{
local($query)=@_;
local($context,$thesaurus,$substr);
# preprocess whitespace and discard spaces after @ and -D
$query =~ tr/ \t/ /s;
$query =~ s/@ /@/g;
$query =~ s/-D /-D/g;
$query =~ s/^-D/ -D/g;
$_=$query;
# "optional URL context as @-sign"
if(m:^([^@]*)\s+@(.*)$:){
$context=$2;
$_=$1;
} ### to be added: "IN"
while(m:\s+-[SDT]\d*$:){
# "turn on "global" thesaurus" by adding -T"
if(m:^(.*)\s+-T$:){
$thesaurus="y";
# print "turn on thesaurus\n";
$_=$1;
}
# "turn on matching substrings by adding -S"
if(m:^(.*)\s+-S$:){
$substr="y";
# print "turn on substring matching\n";
$_=$1;
}
# "turn on modified since n days" by adding -D"
if(m:^(.*)\s+-D(\d+)$:){
$days=$2;
# print "turn on modified since $days\n";
$_=$1;
}
}
# print "remaining query $_\n";
@list=split(/ /,$_);
$expectword="y" unless($days && $#list==-1);
foreach $w (@list){
$_ = $w;
tr/A-Z/a-z/;
if(/^and$/) {
if($expectword) {$err="$w"; last;}
$expectword="y";
$bool .= "&";
}elsif(/^or$/){
if($expectword) {$err="$w"; last;}
$expectword="y";
$bool .= "+";
}else{
### unless($expectword) {$err="$w"; last;}
# new: and is optional
unless($expectword) {
$bool .= "&";
}
$expectword="";
push(@querystring,$w);
}
}
if($expectword){
return ("syntax error in query: must end with keyword!");
}
if($err){
return ("syntax error in query near '$err'!");
}
#print"c=$context\nt=$thesaurus\nl=$levenshtein\n";
#print "b=$bool\nd=$days\nq=@querystring\n";
return("",$context,$thesaurus,$substr,$bool,$days,@querystring);
}
# get index entries matching query
sub getindex{
local($thes,$bool,$days,$substr,@query)=@_;
local(@list,$count,$item,$w,@wordnum,$grepexpr,$ret);
local($limit);
if($days){
$limit=time()-(60*60*24*$days) unless($days==0);
}
foreach $item (@query){
++$count;
local($w);
$_=$item;
local($flag)=$thes;
if (/{(.*)}/) {
$_ = $1;
$flag="y";
}
# convert e.g. "Picture" to "picture"
if(/^[A-Z][^A-Z]*$/){
tr/A-Z/a-z/;
}
# evaluate thesaurus
if ($flag) {
push (@keywords,$_);
$wordnum{$_}=$count;
local(@synonyms)=split(/\n/,&thesread($thesfile,$_));
foreach $w (@synonyms){
push (@keywords,$w);
$wordnum{$w}=$count;
}
} else {
$w=$_;
push (@keywords,$w);
$wordnum{$w}=$count;
}
}
$grepexpr="^@";
foreach (@keywords) { $grepexpr.="|$_"; }
local($timstr,$pat);
open(fpin,"<$indexfile") || die "$!";
while(