#!/usr/bin/perl
# rebuilds bayesian spam database
# (c) John Hanna 2003 under the terms of the GPL
# Updated July 2004 for simple proxy support.
# (c) Fritz Borgstedt 2006 under the terms of the GPL
# Updated Feb 2008 refactoring and rewrites
# (c) Kevin 2008 under the terms of the GPL
use bytes;    # get rid of anoying 'Malformed UTF-8' messages
use Digest::MD5 qw(md5_hex);
use English '-no_match_vars';
use File::Copy;
use IO::Handle;
use IO::Socket;
use Time::Local;
use Time::HiRes;
use Cwd;
use strict qw(vars subs);
our $AvailLWP  = eval('use LWP::Simple; 1');    # LWP::Simple module installed
our $CanUseLWP = $AvailLWP;
if ($CanUseLWP) {
#        my $ver        = eval('LWP::Simple->VERSION');
#        print "LWP::Simple $ver installed - download griplist available\n" ;
    } elsif ( !$AvailLWP ) {
        print "LWP::Simple module not installed - download griplist not available\n";
    }
#use warnings;
our $VERSION = "2.8.5.2";
our $modversion = '(1.0.00)'; 


#no output buffering to screen
*STDOUT->autoflush();

#holy predeclarations Batman!
use vars qw(
    $autoCorrectCorpus $base $DropList $correctednotspam $correctednotspamcount $correctedspam
    $correctedspamcount $discarded $DoDropList $DoNotCollectRed $EmailAdrRe $EmailDomainRe $EmailFrom $EmailAdminReportsTo $griplist $HamWordCount
    $KeepWhitelistedSpam $lowernorm $Log $LogDateFormat $maillogExt $MaxBytes $MaxCorrectedDays $MaxBayesFileAge $MaxNoBayesFileAge  $MaintBayesCollection $MaxFiles $MaxKeepDeleted $MaxWhitelistDays
    $MaxWhitelistLength  $maintbayescollection $minimumfiles $minimumdays $mydb $myhost $mypassword $myuser $myName $notspamlog $processTime
    $notspamlogcount $npRe $incomingOkMail $OrderedTieHashSize $pbdbfile $proxyserver $noGriplist		
    $RebuildLog $rebuildrun $redlistdb $redRe $redReRE $resendmail $setFilePermOnStart $silent $spamdb $spamdbFile $RegExLength
    $spam $spamdbFname $spamlog $spamlogcount $SpamWordCount $starttime
    $usesubject $WhitelistCleanFreq $whitelistdb $WhitelistObject $RebuildNotify $RedlistObject $viruslog $whiteRe $whiteReRE $wildcardUser
    %HamHash %Helo %Redlist %spam %SpamHash %Whitelist $asspLog $DoNotCollectRedList $DoNotCollectRedRe
    $DoFullGripDownload $UseLocalTime $uppernorm $TrashObject %Trashlist
    $runAsUser $runAsGroup
);
# load from command line if specified
if($ARGV[0]) { 
 $base=$ARGV[0]; 
} else { 
 # the last one is the one used if all else fails 
 $base = cwd(); 
 unless (-e "$base/assp.cfg") { 
   foreach ('.','/usr/local/assp','/home/assp','/etc/assp','/usr/assp','/applications/assp','/assp','.') {
    if (-e "$_/assp.cfg") {
      $base=$_; 
      last ; 
    } 
   } 
 } 
 $base = cwd() if $base eq '.'; 
}
unless (chdir $base) {
print
"Usage:
  perl rebuildspamdb.pl  c:\\assp  -- runs the programm in basedirectory c:\\assp\n
";
 die "Abort: unable to change to basedirectory $base";
}
$silent = 1 if (lc $ARGV[1] =~ /silent/i ||  lc $ARGV[0] =~ /silent/i);

#load configuration options from assp.cfg file
&loadconfig();

fork() && exit;

# open log file
if ( -e "$rebuildrun.bak" ) {
    unlink("$rebuildrun.bak") or die "unable to remove file: $!";
}
if ( -e $rebuildrun ) {
    copy( $rebuildrun, "$rebuildrun.bak" ) or die "unable to copy file for: $!";
}
open( $RebuildLog, '>', "$rebuildrun" ) or die "unable to open file for logging: $!";

$starttime = time;
&printlog("\n");
for ( my $c = 10; $c >= 1; $c-- ) { &printlog(q{*}); }
my $savesilent=$silent;
$silent=0;
printlog (&timestring(time) . " RebuildSpamDB $VERSION $modversion is starting;\n") ;
$silent=$savesilent;
&printlog( "\nRunning in basedirectory '$base'\n");
#-- check if running as root 
&printlog( "Running as root!!\n") if $< == 0 && $^O ne "MSWin32";
 
#-- print username
&printlog( "Running as user '" . (getpwuid($<))[0] . "'\n") if $< != 0 && $^O ne "MSWin32";
 

#if ($silent && $< == 0 && $^O ne "MSWin32") {
#	my ( $uid, $gid ) = getUidGid( $runAsUser, $runAsGroup )
#      if ( $runAsUser || $runAsGroup );
#	switchUsers( $uid, $gid ) if ( $runAsUser || $runAsGroup );
#}

my $AvailTieRDBM  = eval "use Tie::RDBM; 1";    # Is the required module installed?
my $CanUseTieRDBM = $AvailTieRDBM;              # this looks wierd but it's the only way it works
undef $AvailTieRDBM;
$EmailAdrRe    = "[^()<>@,;:\\\"\\[\\]\000-\040]+";
$EmailDomainRe = '(?:\w[\w\.\-]*\.\w+|\[[\d\.]*\.\d+\])';

# set counts
$HamWordCount          = $SpamWordCount = $correctedspamcount = 0;
$correctednotspamcount = $spamlogcount  = $notspamlogcount    = 0;






&printlog("\n---ASSP Settings---\n");
if ($DoNotCollectRedList) {
    &printlog(
        "Do Not Collect Messages with RedListed address: Enabled\n**Messages with RedListed addresses will be removed from the corpus!**\n\n"
    );
}
if ($usesubject) {
    &printlog(
        "Use Subject as Maillog Names: Enabled\n\n");
}
else { &printlog("Use Subject as Maillog Names: Disabled\n"); }
&printlog("Maxbytes: $MaxBytes \n");
&printlog("Maxfiles: $MaxFiles \n");


#rebuild various cache files and lists
&repair();

# Let's clean the old entries

 &cleanTrashlist();
# Let's clean the non bayesian folder of old entries 
# Let's clean the bayesian folder of old entries  
# Let's clean the corrected spam/notspam folder of old entries
 &cleanUpCollection();


# name, contents, refrence to "compiled" object
#&compileregex( "whiteRe", $whiteRe, \$whiteReRE );
&compileregex( "redRe",   $redRe,   \$redReRE );

# redlist,whitelist
&createlistobjects();

# isspam?, path, filter, weight, processing sub
$correctedspamcount    = &processfolder( 1, $correctedspam,    "*",      2, \&dospamhash );
$correctednotspamcount = &processfolder( 0, $correctednotspam, "*",      4, \&dohamhash );
$spamlogcount          = &processfolder( 1, $spamlog,          "*", 1, \&checkspam );
$notspamlogcount       = &processfolder( 0, $notspamlog,       "*", 1, \&checkham );
our $norm = $HamWordCount ? ( $SpamWordCount / $HamWordCount ) : 1;
open( my $normFile, '>', "$base/normfile" ) || warn "unable to open $base/normfile: $!\n";
if ($normFile) {
    print { $normFile } "$norm $correctedspamcount $correctednotspamcount $spamlogcount $notspamlogcount";
    close $normFile;
}

# Create Bayesian DB
&generatescores();

# Create HELo blacklist
&createheloblacklist();
&printlog(
    "\nSpam Weight:\t   " . commify($SpamWordCount) . "\nNot-Spam Weight:   " . commify($HamWordCount) . "\n\n" );
if ( !($norm) ) {    #invalid norm
    &printlog("Warning: Corpus insufficent to calculate normality!\n");
}
else {               #norm exists, print it


        my $normdesc = '';
        if    ( $norm < 0.5 ) { $normdesc = '(warning: ham heavy)'; }
        elsif ( $norm < 0.7   ) { $normdesc = '(ok - slightly ham heavy)'; }
        elsif ( $norm < 1.2   ) { $normdesc = '(ok - balanced)'; } 
        elsif ( $norm < 1.5 ) { $normdesc = '(ok - slightly spam heavy)'; }
        else                  { $normdesc = '(warning: spam heavy)'; }
        &printlog( "Corpus norm:\t%.4f %s $normdesc \n", $norm  );

}
&printlog( "Corpus correction settings ( see autoCorrectCorpus in ASSP GUI) - low:$lowernorm high:$uppernorm minimum files:$minimumfiles minimum days:$minimumdays\n");
$lowernorm = 0.5 if $lowernorm && ($lowernorm > 1 or $lowernorm < 0.5);
if ( $lowernorm && $uppernorm ) {
 	if ( $lowernorm && $norm < $lowernorm ) {

        if ($autoCorrectCorpus && $notspamlog ) {
            my $info = &cleanUpMaxFiles($notspamlog, $lowernorm - $norm);


        }
	}
	$uppernorm = 1.5  if $uppernorm && ($uppernorm > 1.5 or $uppernorm < 1.0);
	if ( $norm > $uppernorm && $uppernorm > 1) {

        if ($autoCorrectCorpus && $spamlog) {
            my $info = &cleanUpMaxFiles($spamlog, $norm - $uppernorm);


        }
	}
}
if   ( time - $starttime != 0 ) { $processTime = time - $starttime; }
else                            { $processTime = 1; }
&printlog( "\nTotal processing time: %d second(s)\n\n", $processTime );


if ( !$noGriplist || !$asspLog ) { &uploadgriplist(); }
&downloadgriplist();
&downloaddroplist();

$savesilent=$silent;
$silent=0;
&printlog( "\n");
&printlog( &timestring(time) . " RebuildSpamDB $VERSION $modversion ended;\n");
$silent=$savesilent;
&printlog( "Sending Notify to $RebuildNotify\n") if $RebuildNotify;
&printlog( "Sending Notify not possible, address in RebuildNotify missing\n") if !$RebuildNotify;
close $RebuildLog;
if ($RebuildNotify) {
        &sendNotification(
          $EmailFrom,
          $RebuildNotify,
          'RebuildSpamDB - report',
          "File rebuildrun.txt follows:\r\n\r\n",
          "$base/rebuildrun.txt");
    }


##########################################
#           script ends here
##########################################
sub createlistobjects {

    if ( $CanUseTieRDBM && $whitelistdb =~ /mysql/ && !$KeepWhitelistedSpam ) {
        eval {
            $WhitelistObject = tie %Whitelist, 'Tie::RDBM', "dbi:mysql:database=$mydb;host=$myhost",
                { user => "$myuser", password => "$mypassword", table => 'whitelist', create => 0 };
        };
        if ($EVAL_ERROR) {
            &printlog("whitelist mysql error: $@");
            $CanUseTieRDBM = 0;
            $whitelistdb   = "whitelist";
        }
    }
    elsif ( !$KeepWhitelistedSpam ) {
        if ( -e $whitelistdb ) { $WhitelistObject = tie( %Whitelist, 'orderedtie', "$whitelistdb" ); }
    }
    if ( $CanUseTieRDBM && $redlistdb =~ /mysql/ && ( $DoNotCollectRed || $DoNotCollectRedList ) ) {
        eval {
            $RedlistObject = tie %Redlist, 'Tie::RDBM', "dbi:mysql:database=$mydb;host=$myhost",
                { user => "$myuser", password => "$mypassword", table => 'redlist', create => 0 };
        };
        if ($EVAL_ERROR) {
            &printlog("redlist mysql error: $@");
            $CanUseTieRDBM = 0;
            $redlistdb     = "redlist";
        }
    }
    elsif ($DoNotCollectRed) {
        if ( -e $redlistdb ) { $RedlistObject = tie( %Redlist, 'orderedtie', "$redlistdb" ); }
    }
    return;
} ## end sub createlistobjects

sub generatescores {
    my ( $t, $s, @result, $pair, $v );
    &printlog("\nGenerating weighted Bayesian tuplets...");
    open( $spamdbFile, '>', "$spamdb.tmp" ) || die "unable to open $spamdb.tmp: $!\n";
    binmode $spamdbFile;
    print { $spamdbFile } "\n";
    while ( ( $pair, $v ) = each(%spam) ) {
        ( $s, $t ) = split( q{ }, $v );
        $t = ( $t - $s ) * $norm + $s;    # normalize t
        if ( $t < 5 ) {

            #$unknowns+=$s; $unknownt+=$t;
            next;
        }

        # if token represents all spam or all ham then square its value
        if ( $s == $t || $s == 0 ) {
            $s = $s * $s;
            $t = $t * $t;
        }
        $v = ( 1 + $s ) / ( $t + 2 );
        $v = sprintf( "%.7f", $v );
        $v = '0.9999999' if $v >= 1;
        $v = '0.0000001' if $v <= 0;
        push( @result, "$pair\002$v\n" ) if abs( $v - .5 ) > .09;
    }
    &printlog("done\n");
    undef %spam;    # free some memory
    &printlog("\nSaving rebuilt SPAM database...");
    for ( sort @result ) { print { $spamdbFile } $_; }
    close $spamdbFile;
    if ( -e "$spamdb.bak" ) { unlink("$spamdb.bak") || &printlog("unable to remove '$spamdb.bak' $!\n") }
    if ( -e $spamdb ) {
        rename( $spamdb, "$spamdb.bak" ) || &printlog("unable to rename '$spamdb' to '$spamdb.bak' $!\n");
    }
    rename( "$spamdb.tmp", $spamdb ) || &printlog("unable to rename '$spamdb.tmp' to '$spamdb' $!\n");
    &printlog("done\n");
    my $filesize = -s "$spamdb";
    &printlog( "\nResulting file '$spamdbFname' is " . commify($filesize) . " bytes\n" );
    my $pairs = scalar @result;
    &printlog( "Bayesian Pairs: " . commify($pairs) . "\n" );
    return;
} ## end sub generatescores

sub createheloblacklist {
    my (@Helo);
    open( my $FheloBlack, '>', "$spamdb.helo.tmp" ) || &printlog("unable to open '$spamdb.helo.tmp' $!\n");
    binmode $FheloBlack;
    print { $FheloBlack } "\n";
    while ( my ( $helostr, $weights ) = each(%Helo) ) {
        $weights->[1] = 0 if ( !defined $weights->[1] );
        $weights->[0] = 0 if ( !defined $weights->[0] );
        if ( $weights->[1] / ( $weights->[0] + $weights->[1] + .1 ) > .98 ) { push( @Helo, "$helostr\0021\n" ); }
    }
    print { $FheloBlack } sort @Helo;
    close $FheloBlack;
    &printlog( "\nHELO Blacklist: " . scalar(@Helo) . " HELOs\n" );
    if ( -e "$spamdb.helo.bak" ) {
        unlink("$spamdb.helo.bak") || &printlog("unable to remove '$spamdb.helo.bak' $!\n");
    }
    if ( -e "$spamdb.helo" ) {
        rename( "$spamdb.helo", "$spamdb.helo.bak" )
            || &printlog("unable to rename '$spamdb.helo' to '$spamdb.helo.bak' $!\n");
    }
    rename( "$spamdb.helo.tmp", "$spamdb.helo" )
        || &printlog("unable to rename '$spamdb.helo.tmp' to '$spamdb.helo' $!\n");
    return;
}

sub loadconfig {
    open( my $confFile, '<', "$base/assp.cfg" ) || die "cannot open \"$base/assp.cfg\": $!";
    local $/;
    my %Config = split( /:=|\n/, <$confFile> );

    close $confFile or die "unable to close: $!";
    $TrashObject       = tie %Trashlist,   'orderedtie', "$base/trashlist.db";
    
    $DoDropList     	 = $Config{ DoDropList };
    $runAsUser			 = $Config{ runAsUser };
    $runAsGroup			 = $Config{ runAsGroup };
    $correctednotspam    = $Config{ correctednotspam } && "$Config{base}/$Config{correctednotspam}" || 'errors/notspam';
    $correctedspam       = $Config{ correctedspam } && "$Config{base}/$Config{correctedspam}" || 'errors/spam';
	$incomingOkMail      		 = $Config{ incomingOkMail } && "$Config{base}/$Config{incomingOkMail}" || 'okmail';
    $DoNotCollectRed     = $Config{ DoNotCollectRed };
    $DoNotCollectRedRe   = $Config{ DoNotCollectRedRe };
    $DoNotCollectRedList = $Config{ DoNotCollectRedList };
    $KeepWhitelistedSpam = $Config{ KeepWhitelistedSpam };
    $Log                 = $Config{ logfile } && "$Config{base}/$Config{logfile}" || 'maillog.txt';
    $maillogExt          = $Config{ maillogExt };
    $MaxBytes            = $Config{ MaxBytes };
    $MaxFiles            = $Config{ MaxFiles };
    $MaxWhitelistDays    = $Config{ MaxWhitelistDays } || 90;
    $MaxCorrectedDays    		= $Config{ MaxCorrectedDays } || 1000;
    $MaxNoBayesFileAge	 		= $Config{ MaxNoBayesFileAge } || 30; 
    $MaxBayesFileAge	 		= $Config{ MaxBayesFileAge };
    $MaintBayesCollection	 	= $Config{ MaintBayesCollection } || 1;
    $maintbayescollection		= $MaintBayesCollection;
    $MaxWhitelistLength  	= $Config{ MaxWhitelistLength } || 60;
    $MaxKeepDeleted  		= $Config{ MaxKeepDeleted } || 0;
    $notspamlog          	= $Config{ notspamlog } && "$Config{base}/$Config{notspamlog}" || 'notspam';
    $npRe                = $Config{ npRe };
    $OrderedTieHashSize  = $Config{ OrderedTieHashSize } || 10_000;
    $pbdbfile            = $Config{ pbdb };
    
    $proxyserver         = $Config{ proxyserver };
    $resendmail			 = $Config{ resendmail };
    $redlistdb           = $Config{ redlistdb } && "$Config{base}/$Config{redlistdb}" || 'redlist';
    $redRe               = $Config{ redRe };
	$myName				 = $Config{ myName };
    $setFilePermOnStart	 = $Config{ setFilePermOnStart };
    $spamdb              = $Config{ spamdb } && "$Config{base}/$Config{spamdb}" || 'spamdb';
    $spamdbFname         = $Config{ spamdb } || 'spamdb';
    $spamlog             = $Config{ spamlog } && "$Config{base}/$Config{spamlog}" || 'spam';
    $discarded           = $Config{ discarded } && "$Config{base}/$Config{discarded}" || 'discarded';
	$viruslog            = $Config{ viruslog } && "$Config{base}/$Config{viruslog}" || 'viruslog';
    $usesubject          = $Config{ UseSubjectsAsMaillogNames };
    $whitelistdb         = $Config{ whitelistdb } && "$Config{base}/$Config{whitelistdb}" || 'whitelist';
	$griplist			 = $Config{ griplist };
	$DropList            = $Config{ DropList } || 'file:files/droplist.txt';
	($DropList) 		 = $DropList =~ /^ *file: *(.+)/i if $DropList =~ /^ *file:/;
    $noGriplist    		 = $Config{ noGriplist };

    $asspLog             = $Config{ asspLog };
    $whiteRe             = $Config{ whiteRe };
    $wildcardUser        = $Config{ wildcardUser };
    $mydb                = $Config{ mydb };
    $myhost              = $Config{ myhost };
    $myuser              = $Config{ myuser };
    $mypassword          = $Config{ mypassword };
    $rebuildrun          = &fixPath($base) . "/rebuildrun.txt";
    $EmailAdminReportsTo = $Config{ EmailAdminReportsTo };
    $RebuildNotify		 = $Config{ RebuildNotify };


    $autoCorrectCorpus	 = $Config{ autoCorrectCorpus } || "0.5-1.5-10000-14";
   ($lowernorm,$uppernorm,$minimumfiles,$minimumdays) = split(/-/, $autoCorrectCorpus);

	$minimumfiles = 10000 if !$minimumfiles;
	$minimumfiles = 5000 if  $minimumfiles < 5000;
	$minimumdays = 14 if !$minimumdays;
	$minimumdays = 7 if  $minimumdays < 7;
	
    $EmailFrom		 	 = $Config{ EmailFrom };
    $RegExLength         = $Config{ RegExLength };
    $UseLocalTime        = $Config{ UseLocalTime };
    $LogDateFormat		 = $Config{ LogDateFormat } || "MMM-DD-YY hh:mm:ss";
	
    $DoFullGripDownload  = $Config{ DoFullGripDownload };

    return;
} ## end sub loadconfig

sub processfolder {
    my ( $fldrType, $fldrpath, $filter, $weight, $sub ) = @_;
    my ( $count, $processFolderTime, $folderStartTime, $fileCount, @files );
    our ( $WhiteCount, $RedCount );
    $folderStartTime = time;
    $fldrpath        = &fixPath($fldrpath);
    &printlog( "\n" . $fldrpath . "\n" );
    $fldrpath .= "/*";
    $fileCount = &countfiles($fldrpath);
    &printlog( "File Count:\t" . commify($fileCount) );
    &printlog("\nProcessing...");
    $count = $WhiteCount = 0;
    @files = glob($fldrpath);

    #while( glob($fldrpath) && $count <= $MaxFiles ) {
    foreach my $file (@files) {
    	
        &add( $fldrType, $file, $weight, $sub );
        $count++;
        last if $count >= $MaxFiles;    #too many files
    }
    if   ( time - $folderStartTime != 0 ) { $processFolderTime = time - $folderStartTime; }
    else                                  { $processFolderTime = 1; }
    $count = $count - $WhiteCount ;


    if ($WhiteCount) {
        &printlog( "\nRemoved White:\t" . commify($WhiteCount) );
    }

    &printlog( "\nImported Files:\t" . commify($count) );

    if ( $count  > $MaxFiles ) {
        $maintbayescollection = 1;
    }

    #&printlog( "\n " . commify($SpamWordCount) . " spam weight \n " . commify($HamWordCount) . " non-spam weight." );
    &printlog("\nFinished in $processFolderTime second(s)\n");
    
    return $count;
} ## end sub processfolder

sub countfiles {
    my ($fldrpath) = @_;
    my @fileCount = glob("$fldrpath");
    return scalar(@fileCount);
}

sub commify {
    local $_ = shift;
    1 while s/^([-+]?\d+)(\d{3})/$1,$2/;
    return $_;
}

sub hash {
    my ($msgText) = @_;

    my ( $head, $body );

    # creates a md5 hash of $msg body
    if ( $msgText =~ /^(.*?)\n\r?\n(.*)/s ) {

        $head = $1;
        $body = $2;

        return md5_hex($body);
    }
    else {

        #return q;
        #There is no split, the message has no valid body
        return md5_hex($msgText);
    }

    #return $value;
    return;
}

sub dospamhash {
    my ( $FileName, $msgText ) = @_;
    $SpamHash{ &hash($msgText) } = '1';
    return;
}

sub dohamhash {
    my ( $FileName, $msgText ) = @_;
    $HamHash{ &hash($msgText) } = q{};
    return;
}

sub checkspam {
    my ( $FileName, $msgText ) = @_;
    our $HamHash;
    $msgText = &hash($msgText);
    my ( $return, $reason );
    if ( defined( $HamHash{ $msgText } ) ) {

        # we've found a message in the spam database that is the same as one in the corrected Ham group
        my $fn = shift;
        &deletefile( $fn, "found in $correctednotspam" );
        return 1;
    } elsif ( $reason = &redlisted( $_[1] ) ) {
        my $fn = shift;
        &deletefile( $fn, $reason );
        return 1;

    } elsif ( $reason = &whitelisted( $_[1] ) ) {
        my $fn = shift;
        &deletefile( $fn, $reason );
        return 1;
    }
    return 0;
}

sub checkham {
    my ( $FileName, $msgText ) = @_;
    our $SpamHash;
    my ( $return, $reason );
    $msgText = &hash($msgText);
    if ( defined( $SpamHash{ $msgText } ) ) {

        # we've found a message in the ham database that is the same as one in the corrected spam group
        my $fn = shift;
        &deletefile( $fn, "found in $correctedspam" );
        return 1;

    }
    return 0;
}

sub getrecontent {
    my ( $value, $name ) = @_;
    my $fromfile = 0;
    if ( $value =~ /^ *file: *(.+)/i ) {

        # the option list is actually saved in a file.
        $fromfile = 1;
        my $fil = $1;
        $fil = "$base/$fil" if $fil !~ /^\Q$base\E/i;
        local $/;
        if ( open( my $File, '<', $fil ) ) {
            $value = <$File>;

            # clean off comments
            $value =~ s/#.*//g;

            # replace newlines (and the whitespace that surrounds them) with a |(pipe character)
            $value =~ s/\s*\n\s*/|/g;
            close $File;
        }
        else { $value = q{}; }
    }
    $value =~ s/\|\|/\|/g;
    $value =~ s/\s*\|/\|/g;
    $value =~ s/\|\s*/\|/g;
    $value =~ s/\|\|+/\|/g;
    $value =~ s/^\s*\|?//;
    $value =~ s/\|?\s*$//;
    $value =~ s/\|$//;
    return $value;
} ## end sub getrecontent

sub batv_remove_tag {
    my $mailfrom = shift;
    
    if ($mailfrom =~ /^(prvs=.*=)(.*)/o) {
	
        $mailfrom = lc $2;
       
    }
    return $mailfrom;
}

sub whitelisted {
    return 0 if $KeepWhitelistedSpam;
    my $m = shift;
    my ( $curaddr, %seen );


    $m =~ s/\n\r?\n.*//s;    # remove body
    while ( $m =~ /($EmailAdrRe\@$EmailDomainRe)/igo ) {
        my $curaddr = lc($1);    #
        $curaddr = batv_remove_tag($curaddr);
		
        #my $curaddr = lc( $1 . $2 );
        if ( exists $seen{ $curaddr } ) {
            next;                #we already checked this address
        }
        else { $seen{ $curaddr } = 1; }
        if ( $Whitelist{ $curaddr } ) {
            my $reason = $curaddr;
            $reason =~ s/\s+$/ /g;
            $reason =~ s/[\r\n]/ /g;
            our $WhiteCount++;
            return ( " -- '$reason' is in Whitelist");
        }
        if ($wildcardUser) {
            my ( $mfdd, $alldd, $reason );
            $mfdd = $1 if $curaddr =~ /(\@.*)/;
            $alldd = "$wildcardUser$mfdd";
            if ( $Whitelist{ lc $alldd } ) {
                $reason = $curaddr;
                $reason =~ s/\s+$/ /g;
                $reason =~ s/[\r\n]/ /g;
                our $WhiteCount++;
                return ( " -- '$reason' is in Whitelist ($wildcardUser)");
            }
        }
    } ## end while ( $m =~ /($EmailAdrRe\@$EmailDomainRe)/igo)
    return 0;
} ## end sub whitelisted

sub redlisted {
    my $m = shift;
    my (%seen);
	my $isasspheader;

    if ( $DoNotCollectRedList) {    
        $m =~ s/\n\r?\n.*//s;                            # remove body
        while ( $m =~ /($EmailAdrRe\@$EmailDomainRe)/igo ) {
            my $curaddr = lc($1);

            #$curaddr = lc( $1 . $2 );
            if ( exists $seen{ $curaddr } ) {
                next;                                    #we already checked this address
            }
            else { $seen{ $curaddr } = 1; }
            if ( $Redlist{ $curaddr } ) {
                my $reason = $curaddr;
                $reason =~ s/\s+$/ /g;
                $reason =~ s/[\r\n]/ /g;
                our $RedCount++;
                return ( " -- '$reason' is in Redlist");
            }
        }
    }
    return 0;
} ## end sub redlisted

sub deletefile {
    my ( $fn, $reason, $nolog ) = @_;

	if ( -e $fn ) {
        	if ( -w $fn || -W $fn ) {
            	&printlog( "\nremoving " . $fn . q{ } . $reason );
            	if ($MaxKeepDeleted) {
    		$Trashlist{$fn}=time;
    			} else {
            	unlink($fn);
            	}
        	} else { 
        		printlog( "\ncannot delete " . $reason . " message " . $fn . ": file is not writable: $!" ) ; 
			}
	} else { 
    		printlog( "\ncannot delete " . $reason . " message " . $fn . ": $!" ) if !$nolog; 
    }
    
 
}

sub getfile {
    my ( $fn, $sub ) = @_;
    my $message;
    my $count;
    my $numreadchars;
    return if exists $Trashlist{$fn};
    open( my $file, '<', "$fn" ) || return;
	
#	my $dtime=(stat($fn))[9]-time;
	
#    return if $dtime > 0;
    # Maxbytes or 10000, whichever is less
    $numreadchars = $MaxBytes <= 10_000 ? $MaxBytes : 10_000;
    $count = read( $file, $message, $numreadchars );    # read characters into memory
    close $file;
    return if $sub->( $fn, $message );                  # have i read this before?

    return $message;
}

sub add {
    my ( $isspam, $fn, $factor, $sub ) = @_;
    return if -d $fn;
    my ( $curHelo, $CurWord, $PrevWord, $sfac, $tfac );
    $PrevWord = $CurWord = q{};
    my $content = &getfile( $fn, $sub );
    return unless $content;
    if ( $content =~ /helo=(.*?)\)/i ) {
        $curHelo = lc($1);
        if ( $Helo{ $curHelo } ) { $Helo{ $curHelo }->[$isspam] += $factor; }
        else {    #it doesn't seem to exist. create it.
            $Helo{ $curHelo }->[$isspam] = $factor;
        }
    }
    $content = &clean($content);
    while ( $content =~ /([-\$A-Za-z0-9\'\.!\240-\377]{2,})/g ) {
        if ( length($1) > 20 || length($1) < 2 ) { next }
        $PrevWord = $CurWord;
        $CurWord  = lc($1);

        #next if $text=~/^\d/;      # ignore numbers
        $CurWord =~ s/[,.']+$//;    # remove commas and periods at the end of strings
        $CurWord =~ s/!!!+/!!/g;    # replace excessive exclamation points
        $CurWord =~ s/--+/-/g;      # replace excessive dashes
        if ( !$PrevWord ) { next }  # We only want word pairs
        if ( length($CurWord) < 2 || length($PrevWord) < 2 ) { next }    # too short after cleaning

        # increment global weights, they are not really word counts
        if   ($isspam) { $SpamWordCount += $factor; }
        else           { $HamWordCount  += $factor; }
        if ( exists( $spam{ "$PrevWord $CurWord" } ) ) {
            ( $sfac, $tfac ) = split( q{ }, $spam{ "$PrevWord $CurWord" } );
        }
        else {

            # the pair does not exist, create it
            $spam{ "$PrevWord $CurWord" } = "0 0";
            ( $sfac, $tfac ) = split( q{ }, $spam{ "$PrevWord $CurWord" } );
        }
        $sfac += $isspam ? $factor : 0;
        $tfac += $factor;
        $spam{ "$PrevWord $CurWord" } = "$sfac $tfac";
    } ## end while ( $content =~ /([-\$A-Za-z0-9\'\.!\240-\377]{2,})/g)
    return;
} ## end sub add

# clean up source email
sub clean {
    local $_ = "\n" . shift;
    my $helo;
    if ( $helo = /helo=([^)]+)\)/i ) {

        # if the helo string is long, break it up
        if ( length($helo) > 19 ) { $helo =~ s/(\w+)/ hlo $1 /g }
    }
    else { $helo = q{}; }
    my $rcpt = "rcpt " . join( " rcpt ", /($EmailAdrRe\@$EmailDomainRe)/go );

    # replace &#ddd encoding
    s/&#(\d{1,3});?/chr($1)/ge;

    #s/base64.{0,99}\n\n([a-zA-Z0-9+\/\n=]+)/base64decode($1)/gse;
    # replace base64 encoding
    s/\n([a-zA-Z0-9+\/=]{40,}\r?\n[a-zA-Z0-9+\/=\r\n]+)/&base64decode($1)/gse;

    # clean up quoted-printable references
    s/(Subject: .*)=\r?\n/$1\n/;

    #if(/quoted-printable/) {
    s/=\r?\n//g;
    s/=([0-9a-fA-F]{2})/pack("C",hex($1))/gei;

    #}
    #s/(http:\/\/\S+)/&fixurl($1)/ige;
    s/%([0-9a-fA-F][0-9a-fA-F])/pack('C',hex($1))/ge;    # replace url encoding

    # strip out mime continuation
    s/.*---=_NextPart_.*\n//g;

    # mark the subject
    s/\nsubject: (.*)/&fixsub($1)/ige;

    # remove received lines
    s/\n(received|Content-Type): .*(\n[\t ].*)*//ig;

    # remove other header lines
    s/(\n[a-zA-Z\-]{2,40}: .*(\n[\t ].*)*){2,}//g;

    # clean up &nbsp; and &amp;
    s/&nbsp;?/ /gi;
    s/&amp;?/and/gi;
    s/(\d),(\d)/$1$2/g;
    s/\r//g;
    s/ *\n/\n/g;
    s/\n\n\n\n\n+/\nblines blines\n/g;

    # clean up html stuff
    s/<script.*?>\s*(<!\S*)?/ jscripttag jscripttag /ig;
    while (s/(\w+)(<[^>]*>)((<[^>]*>)*\w+)/$2$1$3/g) { }    # move html out of words
    s/<([biu]|strong)>/ boldifytext boldifytext /gi;

    # remove some tags that are not informative
    s/<\/?(p|br|div|t[dr])[^>]*>/\n/gi;
    s/<\/([biu]|font|strong)>//gi;
    s/<\/?(html|meta|head|body|span|o)[^>]*>//ig;
    s/(<a\s[^>]*>)(.*?)(<\s*\/a\s*>)/$1.fixlinktext($2).$3/igse;
    s/<\s*\/a\s*>//gi;

    # treat titles like subjects
    s/<title[^>]*>(.*?)<\/title>/&fixsub($1)/ige;

    # remove style sheets
    s/<style[^>]*>.*?<\/style>//igs;

    # remove html comments
    s/<!.*?-->//gs;
    s/<![^>]*>//g;

    # look for random words
    s/[ a-z0-9][ghjklmnpqrstvwxz_]{2}[bcdfghjklmnpqrstvwxz_0-9]{3}\S*/ randword randword /gi;

    # remove mime seperators
    s/\n--.*randword.*//g;

    # look for linked images
    s/(<a[^>]*>[^<]*<img)/ linkedimage linkedimage $1/gis;
    s/<[^>]*href\s*=\s*("[^"]*"|\S*)/&fixhref($1)/isge;
    s/http:\/\/(\S*)/&fixhref($1)/isge;
    s/(\S+\@\S*\.\w{2,3})\b/&fixhref($1)/ge;

    #clean MSHTML shit
    s/=3D/=/gs;
    s/=20\n//gs;
    s/src=\"cid\:[\w\W]+?\"//gs;
    return "helo: $helo\n$rcpt\n$_";
} ## end sub clean

sub cleanwhite {
    &printlog("\n---Cleaning whitelist ($whitelistdb)---\n");
   
    &printlog( "whitelist entries older than " . $MaxWhitelistDays . " days (MaxWhitelistDays) will be removed\n" );
    my $calcTime = time - 24 * 3600 * $MaxWhitelistDays;
    
    my $wlbefore = 0;
    my $wlafter = 0;
    if ( !( $whitelistdb =~ /mysql/ ) ) {
        if ( open( F, "<", "$whitelistdb" ) ) {
            binmode(F);
	    $_ = <F>; # ignore blank line at start of file
            my $nwhite;
            local $/ = "\n";
            $nwhite = "\n";
            while (<F>) {
                chomp;
                $wlbefore++;
                my ( $adr, $time ) = split( "\002", $_ );
                next if ( !$time || !$adr );
                $adr =~ s/^\'//g;
                $adr =~ s/^\"//g;
                $adr = batv_remove_tag($adr);
                next if ($adr =~ m/^'/);    #skip addresses with leading ' chars
                next if $calcTime > $time || length($adr) > $MaxWhitelistLength;
                $nwhite .= "$adr\002$time\n";
                $wlafter++;
            }
            close F;

                unlink "$whitelistdb.bak";
                rename( $whitelistdb, "$whitelistdb.bak" );
                open( O, ">", "$whitelistdb" );
                binmode(O);
                print O $nwhite;
                close O;           
          
        }
    } ## end if ( !( $whitelistdb =~...
    else {
        my %Whitelist;
        my $WhitelistObject;
        eval {
            $WhitelistObject = tie %Whitelist, 'Tie::RDBM', "dbi:mysql:database=$mydb;host=$myhost",
                { user => "$myuser", password => "$mypassword", table => 'whitelist', create => 0 };
        };
        if ($EVAL_ERROR) {
            &printlog("whitelist mysql error: $@");
            $CanUseTieRDBM = 0;
            $whitelistdb   = "whitelist";
        }
        $wlbefore = scalar keys %Whitelist;
        $wlafter  = $wlbefore;
        while ( my ( $key, $value ) = each %Whitelist ) {

            #my $date1 = localtime($value); #debugging stuff
            #my $date2 = localtime($calcTime);
            #print "$key=$value\n";
            if ( $value < $calcTime || length($key) > $MaxWhitelistLength ) {
                if ( $Whitelist{ $key } ) {
                    delete $Whitelist{ $key };
                    $wlafter--;
                }
            }
        }
        $WhitelistObject->flush() if $WhitelistObject && $whitelistdb !~ /mysql/;

        #untie %Whitelist;
    } ## end else [ if ( !( $whitelistdb =~...
    &printlog( "whitelist before: " . commify($wlbefore) . "\n" );
    &printlog( "whitelist after:  " . commify($wlafter) . "\n" );
    return;
} ## end sub cleanwhite

sub dayofweek {

    # this is mercilessly hacked from John Von Essen's Date::Day
    my ( $d, $m, $y ) = $_[0] =~ /(\S+) +(\S+) +(\S+)/;

    # data for DayOfWeek function
    my %Months = (
        'Jan', 1, 'Feb', 2, 'Mar', 3, 'Apr', 4,  'May', 5,  'Jun', 6,
        'Jul', 7, 'Aug', 8, 'Sep', 9, 'Oct', 10, 'Nov', 11, 'Dec', 12,
    );
    my %Month = ( 1, 0, 2, 3, 3, 2, 4, 5, 5, 0, 6, 3, 7, 5, 8, 1, 9, 4, 10, 6, 11, 2, 12, 4, );
    my %Weekday = ( 0, 'srdSUN', 1, 'srdMON', 2, 'srdTUE', 3, 'srdWED', 4, 'srdTHU', 5, 'srdFRI', 6, 'srdSAT', );
    $y += 2000;
    $m = $Months{ $m };
    if ( $m <= 2 ) { $y--; }
    my $wday = ( ( $d + $Month{ $m } + $y + ( int( $y / 4 ) ) - ( int( $y / 100 ) ) + ( int( $y / 400 ) ) ) % 7 );
    return $Weekday{ $wday };
}
sub fixhref     { my $t = shift; $t =~ s/(\w+)/ href $1 /g; return $t; }
sub fixlinktext { my $t = shift; $t =~ s/(\w+)/atxt $1/g;   return $t; }

sub fixurl {
    my $a = shift;
    $a =~ s/%([0-9a-fA-F][0-9a-fA-F])/pack('C',hex($1))/ge;
    return $a;
}

sub fixsub {
    my $s = shift;

    #print "$s=>";
    $s =~ s/ {3,}/ lotsaspaces /g;
    $s =~ s/(\S+)/ssub $1/g;

    #print "$s\n";
    return "\n$s ssub";
}

sub base64decode {
    my $str = shift;
    my $res = "\n\n";
    $str =~ tr|A-Za-z0-9+/||cd;
    $str =~ tr|A-Za-z0-9+/| -_|;
    while ( $str =~ /(.{1,60})/gs ) {
        my $len = chr( 32 + length($1) * 3 / 4 );
        $res .= unpack( "u", $len . $1 );
    }
    return $res;
}

sub printlog {
    my ( $text, $format ) = @_;
    if ( !$format ) {
        print "$text" unless $silent;
        print $RebuildLog "$text";
    }
    if ($format) {
        printf "$text", $format unless $silent;
        printf $RebuildLog "$text", $format;
    }
    return;
}
sub timestring {
    my ($time,$what) = @_;
    my @m = $time ? localtime($time) : localtime();
    my $tstr = $time ? scalar(localtime($time)) : scalar(localtime());
    my ($day,$month) = $tstr =~ /(...) (...)/;
    my $format = $LogDateFormat;
    if (lc $what eq 'd') {   # date only - remove time part from format
        $format =~ s/[^YMD]*(?:hh|mm|ss)[^YMD]*//g;
    } elsif (lc $what eq 't') { # time only - remove date part from format
        $format =~ s/[^hms]*(?:Y{2,4}|M{2,3}|D{2,3})[^hms]*//g;
    }
    $format =~ s/^[^YMDhms]//;
    $format =~ s/[^YMDhms]$//;
    $format =~ s/YYYY/sprintf("%04d",$m[5]+1900)/e;
    $format =~ s/YY/sprintf("%02d",$m[5]-100)/e;
    $format =~ s/MMM/$month/;
    $format =~ s/MM/sprintf("%02d",$m[4]+1)/e;
    $format =~ s/DDD/$day/e;
    $format =~ s/DD/sprintf("%02d",$m[3])/e;
    $format =~ s/hh/sprintf("%02d",$m[2])/e;
    $format =~ s/mm/sprintf("%02d",$m[1])/e;
    $format =~ s/ss/sprintf("%02d",$m[0])/e;

    return $format;
}


sub uploadgriplist {
    local $/ = "\n";

    #&printlog("Start building Griplist \n");
    open( my $FLogFile, '<', "$Log" ) || &printlog("Unable to create Griplist.\n unable to open logfile '$Log': $!\n");
    my ( $date, $ip, $ipnet, %m, %ok, %locals, $match, $peeraddress, $connect, $day, $gooddays, $st );
    my $buf;
	my $iday;
    #build list of the last 2 days
    my $time = Time::HiRes::time();
    my $dayoffset = $time % ( 24 * 3600 );
    
    for ( my $i = 0 ; $i < 2 ; $i++ ) {
        $gooddays .= '|' if ( $i > 0 );
        $day = localtime( $time - $i * 24 * 3600 );
        $day =~ s/^... (...) +(\d+) (\S+) ..(..)/$1-$2-$4/;
        $gooddays .= $day;
    }
    if ($LogDateFormat !~ /MMM-DD-YY/)  {
	$gooddays .= '|';
    for ( my $i = 0 ; $i < 2 ; $i++ ) {
        $gooddays .= '|' if ( $i > 0 );
        $day = &timestring( $time - $i * 24 * 3600 , 'd');
        $gooddays .= $day;
    }}

   
    undef $day;
    %locals = ( '127', 1, '10', 1, '192.168', 1, '169.254', 1, '::1', 1, 'fe80:', 1 );    #RFC 1918, IPv6
    for ( 16 .. 31 ) { $locals{ "172.$_" } = 1 }                                          #RFC 1918

    while (<$FLogFile>) {
        next unless ( $date, $ip, $match ) = /($gooddays) .*\s([0-9a-f\.:]+) .* to: \S+ (.*)/io;
	$ipnet = $ip;
	if ($ipnet =~ /:.*:/) {
		$ipnet =~ s/:.*/:/ if ($ipnet !~ /^:/);
        	next if $locals{ $ipnet };		# ignore local IP ranges
	}
	else {
		$ipnet =~ s/^(\d+\.\d+)\..*/$1/;
        	next if $locals{ $ipnet };		# ignore local IP ranges
		$ipnet =~ s/^(\d+)\..*/$1/;
        	next if $locals{ $ipnet };		# ignore local IP ranges
	}
	$ipnet = $ip;
	if ($ipnet =~ /:.*:/) {
		$ipnet =~ s/^([0-9a-f]+:[0-9a-f]+:[0-9a-f]+:[0-9a-f]*:).*/$1/i;	# yes: "+++*" so as to allow "2001:123:456::"
	}
	else {
		$ipnet =~ s/(\d+)\.(\d+)\.(\d+)\.(\d+)/$1.$2.$3/;
	}

        if (m/(\[Local]|\[MessageOK]|\[RWL]|\[Whitelisted])|\[NoProcessing]/i) {

            #Good IP
            $m{ $ipnet }  += 1;
            $ok{ $ipnet } += 1;
            next;
        }
        if (m/(Connection idle for|\[Backscatter]|\[Bayesian]|\[BlackDomain]|\[BlackHELO]|\[BombBlack]|\[BombData]|\[BombHeader]|\[BombRaw]|\[BombScript]|\[BombSender]|\[Collect]|\[Connection]|\[DNSBL]|\[DenyIP]|\[DenyStrict]|\[Extreme]|\[ForgedHELO]|\[ForgedLocalSender]|\[FromMissing]|\[IPfrequency]|\[IPperDomain]|\[InvalidHELO]|\[MalformedAddress]|\[MaxErrors]|\[MessageScore]|\[MissingMX]|\[MsgID]|\[OversizedHeader]|\[PTRinvalid]|\[PTRmissing]|\[PenaltyBox]|\[Penalty]|\[RelayAttempt]|\[SpoofedSender]|\[Trap]|\[URIBL]|\[VIRUS]|\[ValidHELO]|spam found|\[blocked\])/i) {

            #Bad IP
            $m{ $ipnet }  += 1;
            $ok{ $ipnet } += 0;
            next;
        }
    }
    close $FLogFile;
    if ( !%m ) {
        &printlog( "Skipping Griplist upload. Not enough messages processed.\n");
        return;
    }
    &printlog("Preparing binary Griplist upload...");
    my $n6 = 0;
    my $n4 = 0;
    my ($buf6, $buf4);
    foreach (keys %m) {
        next if (!$m{$_});
        if ($_ =~ /:/) {
            my $ip = $_;
            $ip =~ s/([0-9a-f]*):/0000$1:/gi;
            $ip =~ s/0*([0-9a-f]{4}):/$1:/gi;
            $buf6 .= pack("H4H4H4H4", split(/:/, $ip));
            $buf6 .= pack("C", (1 - $ok{$_} / $m{$_}) * 255);
            $n6++;
        } else {
            $buf4 .= pack("C3C", split(/\./, $_), (1 - $ok{$_} / $m{$_}) * 255);
            $n4++;
        }
    }
    $st = pack("N2", $n6 / 2**32, $n6);
    $st .= pack("N", $n4);
    $st .= $buf6 . $buf4;
    &printlog(" done\n");
    if ($proxyserver) {
        &printlog("Uploading Griplist via Proxy: $proxyserver\n");
        $peeraddress = $proxyserver;
         $connect     = "POST http://assp.sourceforge.net/cgi-bin/assp_griplist?binary HTTP/1.0";
    }
    else {
        &printlog("Uploading Griplist via Direct Connection\n");
        $peeraddress = "assp.sourceforge.net:80";
        $connect     = <<"EOF";
POST /cgi-bin/assp_griplist?binary HTTP/1.1
User-Agent: rebuildspamdb/$VERSION ($^O; Perl/$];)
Host: assp.sourceforge.net
EOF
    }
    my $socket = new IO::Socket::INET( Proto => 'tcp', PeerAddr => $peeraddress, Timeout => 2 );
    if ( defined $socket ) {
        my $len = length($st);
        $connect .= <<"EOF";
Content-Type: application/x-www-form-urlencoded
Content-Length: $len

$st
EOF
        print { $socket } $connect;
        $socket->sysread($buf, 4096);
        $socket->close;
        &printlog("Submitted $len bytes: $n6 IPv6 addresses, $n4 IPv4 addresses\n");
    }
    else {
        &printlog("unable to connect to assp.sourceforge.net to upload griplist\n");
        return;
    }
    return;
} ## end sub uploadgriplist

sub downloadgriplist {
    &printlog("Griplist download disabled\n")  if $noGriplist;
    return if $noGriplist;
    &printlog("Griplist file not configured\n")  if (!$griplist);
    return if (!$griplist);

    my $rc;

    my $gripListUrl = "http://assp.sourceforge.net/cgi-bin/assp_griplist?binary";
    my $gripFile    = "$base/$griplist";

    ## let's check if we really need to
    if (-e $gripFile) {
        my @s     = stat($gripFile);
        my $mtime = $s[9];
        if (time - $mtime < 8*60*60) {
            &printlog("Griplist download failed: last download too recent\n");
            return;
        }
    }

    # check for previous download timestamp, so we can do delta now
    my %lastdownload;
    $lastdownload{full} = 0;
    $lastdownload{fullUTC} = 0;
    $lastdownload{delta} = 0;
    $lastdownload{deltaUTC} = 0;
    my $delta = "";
    if (open(UTC, "$gripFile.utc")) {
        local $/;
        my $buf = <UTC>;
        close(UTC);
        chop($buf);
        if ($buf =~ /full/ && $buf =~ /delta/) {
            %lastdownload = split(/\s+|\n/, $buf);
        } else {
            $lastdownload{delta} = $buf;
        }
        if (! ($DoFullGripDownload && time - $lastdownload{fullUTC} > $DoFullGripDownload*24*60*60)) {
            my $lasttime;
            $lasttime = $lastdownload{full};
            $lasttime = $lastdownload{delta} if ($lastdownload{delta} > $lastdownload{full});
            $gripListUrl .= "&delta=$lasttime";
            $delta = " (delta)";
        }
    }

    if (!$CanUseLWP) {
        &printlog("Griplist download failed: LWP::Simple Perl module not available\n");
        return;
    }

    if (open(TEMPFILE, ">", "$gripFile.tmp")) {
        #we can create the file, this is good, now close the file and keep going.
        close TEMPFILE;
        unlink("$gripFile.tmp");
    } else {
        &printlog("Griplist download failed: Cannot create $gripFile.tmp\n");
        return;
    }

    # Create LWP ogject
    use LWP::Simple qw(mirror is_success status_message $ua);

    # Set useragent to Rebuild version
    $ua->agent("rebuildspamdb/$VERSION ($^O; Perl/$]; LWP::Simple/$LWP::VERSION)");
    $ua->timeout(20);
    if ($proxyserver) {
        $ua->proxy('http', "http://" . $proxyserver);
        &printlog("Downloading Griplist$delta via HTTP proxy: $proxyserver\n");
    } else {
        &printlog("Downloading Griplist$delta via direct HTTP connection\n");
    }

    # call LWP mirror command
    my $dltime = time;
    $rc = mirror($gripListUrl, "$gripFile.tmp");

    if ($rc == 304) {
        # HTTP 304 not modified status returned
        # can't happen - we ALWAYS get new data
        unlink("$gripFile.tmp");
        return;
    } elsif (!is_success($rc)) {
        # download failed-error code output to logfile
        # &printlog("Griplist download failed: $rc " . status_message($rc). "\n");
        unlink("$gripFile.tmp");
        return;
    }

    # download complete
    my $filesize = -s "$gripFile.tmp";
    &printlog("Griplist download complete: binary download $filesize bytes\n");

    # enough data?
    if ($filesize < 12) {
        &printlog("Griplist download error: grip data too small\n");
        unlink("$gripFile.tmp");
        return;
    }

    # record download time so we can do delta next time
    unlink("$gripFile.utc");
    if (open(UTC, ">$gripFile.utc")) {
        my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst) = gmtime($dltime);
        $year += 1900;
        $mon += 1;
        if (! $delta) {
            $lastdownload{full} = sprintf "%04d%02d%02d%02d%02d%02d", $year, $mon, $day, $hour, $min, $sec;
            $lastdownload{fullUTC} = $dltime;
        } else {
            $lastdownload{delta} = sprintf "%04d%02d%02d%02d%02d%02d", $year, $mon, $day, $hour, $min, $sec;
            $lastdownload{deltaUTC} = $dltime;
        }
        printf UTC "full\t%s\n", $lastdownload{full};
        printf UTC "fullUTC\t%s\n", $lastdownload{fullUTC};
        printf UTC "delta\t%s\n", $lastdownload{delta};
        printf UTC "deltaUTC\t%s\n", $lastdownload{deltaUTC};
        close(UTC);
    }

    # if we did a delta download, read in previous data so we can merge
    my @binFiles;
    push(@binFiles, "$gripFile.bin") if ($gripListUrl =~ /delta=/);
    push(@binFiles, "$gripFile.tmp");

    # convert binary download form to text form used by ASSP
    my $buf;
    my %grip;
    my $action = "read";
    foreach my $binF (@binFiles) {
        my $binSize = -s $binF;
        open(BIN, $binF);   
        binmode(BIN);
        read(BIN, $buf, $binSize);
        close(BIN);

    # IPv6 count
    	my ($n6h, $n6l) = unpack("N2", $buf);
    	my $n6 = $n6h * 2**32 + $n6l;

    # IPv4 count
    	my $n4;
    	eval { $n4 = unpack("x[N2] N", $buf); };


    # decode IPv6 data
    	my $x6 = 0;
    	eval {
    	for (my $i = 0; $i < $n6; $i++) {
        my ($bip, $grey) = unpack("x[N2] x[N] x$x6 a8 C", $buf);
        my $ip = join(":", unpack("H4H4H4H4", $bip)) . ":";
        $ip =~ s/:0+([0-9a-f])/:$1/gio;
        $ip =~ s/:0:$/::/o;

        #                $grip{$ip} = $grey / 255;
        #                $gripdelta{$ip} = $grey / 255 if $deltayonly;
        $x6 += 9;
    	}
    	};

    # decode IPv4 data
    	my $x4 = 0;
    	for (my $i = 0; $i < $n4; $i++) {
        my ($bip, $grey) = unpack("x[N2] x[N] x$x6 x$x4 a3 C", $buf);
        my $ip = join(".", unpack("C3", $bip));
        $grip{$ip} = $grey / 255;

        $x4 += 4;
    }
        &printlog("Griplist binary $action OK: $binF, $n6 IPv6 addresses, $n4 IPv4 addresses\n");
        $action = "merge";
    }

    # remove download file
    unlink("$gripFile.tmp");

    # output binary version, so we can do a delta next time
    &printlog("Writing merged Griplist binary...");
    my $buf;
    my $n6 = 0;
    my $n4 = 0;
    my ($buf6, $buf4);
    foreach my $ip (keys %grip) {
        if ($ip =~ /:/) {
            my $ip2 = $ip;
            $ip2 =~ s/([0-9a-f]*):/0000$1:/gi;
            $ip2 =~ s/0*([0-9a-f]{4}):/$1:/gi;
            $buf6 .= pack("H4H4H4H4", split(/:/, $ip2));
            $buf6 .= pack("C", int($grip{$ip} * 255));
            $n6++;
        } else {
            $buf4 .= pack("C3C", split(/\./, $ip), int($grip{$ip} * 255));
            $n4++;
        }
    }
    $buf = pack("N2", $n6/2**32, $n6);
    $buf .= pack("N", $n4);
    $buf .= $buf6 . $buf4;
    unlink("$gripFile.bin");
    open (BIN, ">$gripFile.bin");
    binmode(BIN);
    print BIN $buf;
    close(BIN);
    chmod 0644, "$gripFile.bin";
    &printlog(" done\n");

    # output text version
    &printlog("Writing merged Griplist text...");
    unlink("$gripFile");
    open (TEXT, ">$gripFile");
    binmode(TEXT);
    print TEXT "\n";
    foreach my $ip (sort keys %grip) {

        printf TEXT "$ip\002%.2f\n", $grip{$ip};
    }
    close(TEXT);
    chmod 0644, "$gripFile";
    &printlog(" done\n");

    &printlog("Griplist writing complete: $n6 IPv6 addresses, $n4 IPv4 addresses\n\n");
}


sub downloaddroplist {

	&printlog("Droplist download disabled\n\n")  if !$DoDropList;
	return if !$DoDropList;
    my $rc;

    my $droplistUrl = "http://www.spamhaus.org/drop/drop.lasso";
    my $dropFile     = "$base/$DropList";

    $dropFile		 = &fixPath($dropFile);

    # let's check if we really need to
    if (-e $dropFile) {
        my @s     = stat($dropFile);
        my $mtime = $s[9];
        if (time - $mtime < 8*60*60) {
            &printlog("Droplist download failed: last download too recent\n");
            return;
        }
    }

    if ( !$CanUseLWP ) {
        &printlog("Droplist download failed: LWP::Simple Perl module not available\n");
        return;
    }

    if ( -e $dropFile  ) {
        if ( !-r $dropFile  ) {
            &printlog( "Droplist download failed: $dropFile  not readable!\n" );
            return;
        } elsif ( !-w $dropFile  ) {
            &printlog( "Droplist download failed: $dropFile  not writable!\n" );
            return;
        }
    }
    else {
        if ( open( TEMPFILE, ">", $dropFile ) ) {
            #we can create the file, this is good, now close the file and keep going.
            close TEMPFILE;
	    unlink($dropFile);
        } else {
            &printlog("Droplist download failed: Cannot create $dropFile \n" );
            return;
        }
    }

    # Create LWP ogject
    use LWP::Simple qw(mirror is_success status_message $ua);

    # Set useragent to Rebuild version
    $ua->agent(
        "rebuildspamdb/$VERSION ($^O; Perl/$]; LWP::Simple/$LWP::VERSION)");
    $ua->timeout(20);
    if ($proxyserver) {
        $ua->proxy( 'http', "http://" . $proxyserver );
        &printlog("Downloading $dropFile via HTTP proxy: $proxyserver\n" );
    } else {
        &printlog("Downloading $dropFile via direct HTTP connection\n" );
    }

    # call LWP mirror command
    $rc = mirror( $droplistUrl, $dropFile );

    if ( $rc == 304 ) {
        # HTTP 304 not modified status returned
        return;
    } elsif ( !is_success($rc) ) {
        #download failed-error code output to logfile
        &printlog("$dropFile download failed: $rc " . status_message($rc). "\n" );
        return;
    } elsif ( is_success($rc) ) {
        # download complete
        my $filesize = -s "$dropFile";
        &printlog("$dropFile download complete: $filesize bytes\n" );
        chmod 0644, "$dropFile";
    }
}
sub compileregex {
    use re 'eval';
    my ( $name, $contents, $REname ) = @_;
    $contents = getrecontent( $contents, $name );
    $contents ||= '^(?!)';    # regexp that never matches

    # trim long matches to 32 chars including '...' at the end
    eval { $$REname = qr/(?si)$contents/ };
    if ($EVAL_ERROR) { print "regular expression error in '$contents' for $name: $@\n"; }
    return q{};
}

sub optionList {

    # this converts a | separated list into a RE
    my ( $d, $configname ) = @_;
    $d = getrecontent( $d, $configname );
    $d =~ s/([\.\[\]\-\(\)\*\+\\])/\\$1/g;
    return $d;
}

sub fixPath {
    my ($path) = @_;
    my $len = length($path);
    if   ( !substr( $path, ( $len - 1 ), 1 ) eq q{/} ) { return $path . q{/}; }
    else                                               { return $path; }
    return;
}

sub repair {
    $/ = "\n";

    # mxa ptr rbl spf uribl white black
    my $pbdb = "$base/$pbdbfile";
    my ( @files, %w );
    my ( $k,     $v );
    if ( !( $pbdbfile =~ /mysql/ ) ) {
        foreach ( glob("$pbdb.*.db") ) { push( @files, $_ ); }
    }
    if ( !( $whitelistdb =~ /mysql/ ) ) { push( @files, $whitelistdb ); }
    if ( !( $redlistdb   =~ /mysql/ ) ) { push( @files, $redlistdb ); }
    foreach my $f (@files) {
        if ( !-e $f ) { next }
        open( my $curfile, "<", $f );

        #<$curfile>;
        while (<$curfile>) {
            ( $k, $v ) = split( /[\001\002\n]/, $_ );
            if ( $k eq q{} || $v eq q{} ) { next }

            #print "$k=$v\n";
            $w{ $k } = $v;
        }
        close $curfile;
        open( my $newfile, ">", "$f.new" );
        binmode $newfile;
        print { $newfile } "\n";
        for ( sort keys %w ) { print { $newfile } "$_\002$w{$_}\n"; }
        close $newfile;
        rename( $f, "$f.bak" );
        rename( "$f.new", $f );
        undef %w;
    }
    return;
} ## end sub repair



sub sendNotification {
    my ($from,$to,$sub,$body,$file) = @_;
    my $text;
    return unless $to;
    return unless $resendmail;    
    my $date=$UseLocalTime ? localtime() : gmtime();
    my $tz=$UseLocalTime ? tzStr() : '+0000';
    $date=~s/(\w+) +(\w+) +(\d+) +(\S+) +(\d+)/$1, $3 $2 $5 $4/;
    $text = "Date: $date $tz\r\n";
    $text .= "X-Assp-Notification: YES\r\n";
  	$text .= "From: <$from>\r\nTo:" if $from !~ /\</;
    $text .= "From: $from\r\nTo:" if $from =~ /\</;

    foreach (split(/\|/, $to)) {
        $text .= " <$_>,";
    }
    chop $text;
    $text .= "\r\n";
    $text .= "Subject: $sub\r\n";
    $text .= "Content-Type: text/plain;	charset=\"ISO-8859-1\"\r\n";
    $text .= "Content-Transfer-Encoding: 7bit\r\n";
    my $msgid = int(rand(1000000));
    $text .= "Message-ID: a$msgid\@$myName\r\n";
    $text = &headerWrap($text);
    $text .= "\r\n";           # end header
    foreach (split(/\r?\n/,$body)) {
        $text .= "$_\r\n";
    }
   	
    my $f;
    if ($file && -e $file && open($f,"<",$file)) {
    	
        while (<$f>) {        	 	
             s/\r?\n//g;
             $text .= "$_\r\n";
        
        }
        close $f;
    }

    $text .= ".\r\n";
    $text =~ tr/\x80-\xFF/_/;   # 7bit only
    my $rfile = "$base/$resendmail/n$msgid$maillogExt";
	-d "$base/$resendmail" or mkdir "$base/$resendmail", 0777;
    if (open($f,">",$rfile)) {
        binmode $f;
        print $f $text;
        close $f;
        &printlog( "write notify message to $rfile\n" );        
    } else {

        &printlog( "error: unable to write notify message to $rfile - $!\n" );
    }
   
}
sub tzStr {

    # calculate the time difference in minutes
    my $minoffset =
      ( Time::Local::timelocal( localtime() ) -
          Time::Local::timelocal( gmtime() ) ) / 60;

   # translate it to "hour-format", so that 90 will be 130, and -90 will be -130
    my $sign = $minoffset < 0 ? -1 : +1;
    $minoffset = abs($minoffset) + 0.5;
    my $tzoffset = 0;
    $tzoffset = $sign * ( int( $minoffset / 60 ) * 100 + ( $minoffset % 60 ) )
      if $minoffset;

    # apply final formatting, including +/- sign and 4 digits
    return sprintf( "%+05d", $tzoffset );
}
# wrap long headers
sub headerWrap {
    my $header = shift;
    $header =~
s/(?:([^\r\n]{60,75}?;)|([^\r\n]{60,75}) ) {0,5}(?=[^\r\n]{10,})/$1$2\r\n\t/g;

    return $header;
}
sub cleanUpFiles {
    my ($folder, $filter, $filetime) = @_;

    my $textfilter = " (*$filter)" if $filter;
    my @files;
    my $file;
    my $count;
    my $dir = &fixPath($folder); 
    $dir =~ s/\\/\//g;
    return unless -e $dir;
    &printlog( "starting cleanup old files$textfilter for folder $dir\n" );
        
    opendir(my $DIR,"$dir");
    @files = readdir($DIR);
    close $DIR;
	my $fldrpath        = $dir . "/*"; 

    my $filecount = &countfiles($fldrpath);
    foreach $file (@files) {
        next if $file eq '.';
        next if $file eq '..';
        next unless $file =~ /$maillogExt$/i or $file =~ /\.rpt$/i;
        next if ($filter && $file !~ /$filter$/i);
        next if ($filter && $file =~ /^$filter$/i);
        $file = "$dir/$file";
        next if -d $file;
        next unless -w $file;
        my $dtime=(stat($file))[9]-time;
        if (($dtime < $filetime * -1) or ($dtime > 0 && $dtime < $MaxKeepDeleted - $filetime)) {
            unlink $file;
            $count++;

        }
    }

    my $filecountafter = &countfiles($fldrpath);
    &printlog( "folder $dir before: $filecount\n" ) ;
	&printlog( "folder $dir deleted: $count\n" ) if $count;
	&printlog( "folder $dir after: $filecountafter\n\n" ) ;
}

sub cleanUpMaxFiles {
    my $folder = shift;
    my $percent = shift;

    my @files;
    my $file;
    my $count;
    my $info;
    my $dir = ($folder !~ /\Q$base\E/i) ? "$base/$folder" : $folder ;
    $dir =~ s/\\/\//g;
    return unless -e $dir;


   

    opendir(my $DIR,"$dir");
    @files = readdir($DIR);
    close $DIR;
    my $filecount = @files - 2;
    printlog("cleaning $dir skipped - number of files($filecount) <= MaxFiles($MaxFiles)\n") if (! $percent && $filecount <= $MaxFiles);
    return $info if (! $percent && $filecount <= $MaxFiles);
    
    my %filelist = ();
    while (@files) {
        $file = shift @files;
        next if $file eq '.';
        next if $file eq '..';
        $file = "$dir/$file";
        if (-d $file) {
            $filecount--;
            next;
        }
        my $ft = (stat($file))[9];
        $ft = $ft - (60 * 24 * 3600) if $ft > time;
        while (exists $filelist{$ft}) {
            $ft++;
        }
        $filelist{$ft} = $file;
        $count++;

    }
    return $info if (! $percent && $filecount <= $MaxFiles);
    my $minfiles = $minimumfiles;    # keep at least 10000 files in the folder
    return $info if $percent && $filecount < $minfiles;
    printlog("\nstarting auto correction for corpus - delete files from $dir\n") if $percent;
    printlog("\nstarting cleaning $dir - delete files from $dir\n") if !$percent;
    my $toFilenumber;
    my $filenum;
    my $time = time - ($minimumdays * 24 * 3600);   # two weeks ago
	my $savecount;
    if ($percent) {
 
        $filenum = int($filecount * $percent);
        printlog("starting to delete $filenum files from $filecount in $dir\n") if $percent;
        $savecount = $filecount;
        $filenum = $filecount - $minfiles if $filecount - $filenum < $minfiles;
       
        
        $toFilenumber = $filecount - ($filenum-1);
     

        
    } else {
        $filenum = $MaxFiles - $filecount;
        $toFilenumber = $MaxFiles;
    }
    $count = 0;
    
    foreach my $filetime (sort keys %filelist) {
        last if $filecount-- < $toFilenumber;

        last if $percent && $filetime > $time;
        unlink "$filelist{$filetime}";
        $count++;

    }
    $savecount -= $count;
	my $toonew = $filenum - $count;
    printlog("finished auto correction for $dir - $toonew skipped because below minimum days, deleted $count, new: $savecount\n") if $percent;
 
    printlog("finished cleaning $dir - $count deleted: $count, new: $filecount\n") if !$percent;

    return $info;
}


sub cleanUpCollection {
	my $age = $MaxNoBayesFileAge * 3600 * 24;
    my @dirs = ('incomingOkMail','discarded','viruslog');
    my $dir;

 
    	&printlog( "\n--- Cleaning NoBayesian folders ---\n" );
    	&printlog( "entries older than $MaxNoBayesFileAge days will be removed\n" ) if $MaxNoBayesFileAge;

    foreach my $dir (@dirs) {
        if ($age) {
            &cleanUpFiles(${$dir},'',$age) if ${$dir};
        } 
    }
    
    $age = $MaxCorrectedDays * 3600 * 24;
    @dirs = ('correctedspam','correctednotspam');
    
    &printlog( "\n--- Cleaning corrected spam/notspam folders ---\n" );
    &printlog( "entries older than $MaxCorrectedDays days will be removed\n" ) if $MaxCorrectedDays;

    foreach my $dir (@dirs) {
        if ($age) {
            &cleanUpFiles(${$dir},'',$age) if ${$dir};
        } else {
            &cleanUpMaxFiles(${$dir}) if ${$dir};
        }
    }

    return unless $maintbayescollection;
    $age = 0;
    @dirs = ('spamlog','notspamlog');
    
    &printlog( "\n--- Cleaning Bayesian folders ---\n" );
    &printlog( "entries older than $MaxBayesFileAge days will be removed\n" ) if $MaxBayesFileAge;

    foreach my $dir (@dirs) {
        if ($age) {
            &cleanUpFiles(${$dir},'',$age) if ${$dir};
        } else {
            &cleanUpMaxFiles(${$dir}) if ${$dir};
        }
    }
}
sub cleanTrashlist {
    my $addresses_before = my $addresses_deleted = 0;
    my $t = time;
    my $mcount;

    while ( my ( $k, $v ) = each(%Trashlist) ) {

        my $ct = $v;
        $addresses_before++;

        if (  $t - $ct >= $MaxKeepDeleted * 3600 * 24
            )
        {
        	deletefile ($k,"Trashlist",1);
            delete $Trashlist{$k};
            $addresses_deleted++;
        }
    }
    &printlog(
"\nTrashlist: cleaning finished; before=$addresses_before, deleted=$addresses_deleted\n"
    ) if $addresses_before>0 ;
   
}
sub getUidGid {
    my ( $uname, $gname ) = @_;

    my $rname = "root";
    eval('getgrnam($rname);getpwnam($rname);');
    if ($@) {

        # windows pukes "unimplemented" for these -- just skip it
        printlog(
"warning:   uname and/or gname are set ($uname,$gname) but getgrnam / getpwnam give errors: $@"
        );
        return;
    }
    my $gid;
    if ($gname) {
        $gid = getgrnam($gname);
        if ( defined $gid ) {
        } else {
            my $msg =
"could not find gid for group '$gname' -- not switching effective gid ";
            printlog( $msg );
            return;
        }
    }
    my $uid;
    if ($uname) {
        $uid = getpwnam($uname);
        if ( defined $uid ) {
        } else {
            my $msg =
"could not find uid for user '$uname' -- not switching effective uid ";
            printlog( $msg );
            return;
        }
    }
    ( $uid, $gid );
}

sub mlog {
}
sub switchUsers {
    my ( $uid, $gid ) = @_;

    my ( $uname, $gname ) = ( $runAsUser, $runAsGroup );
    $> = 0;
    if ( $> != 0 ) {
        my $msg =
"requested to switch to user/group '$uname/$gname' but cannot set effective uid to 0 --  uid is $>";
        printlog( $msg );
        return;
    }
    $< = 0;
    if ($gid) {
        $) = $gid;
        if ( $) + 0 == $gid ) {
            printlog( "\nswitched effective gid to $gid ($gname)\n" );
        } else {
            my $msg =
"failed to switch effective gid to $gid ($gname) -- effective gid=$) ";
            printlog( $msg );
            return;
        }
        $( = $gid;
        if ( $( + 0 == $gid ) {
            printlog( "\nswitched real gid to $gid ($gname)\n" );
        } else {
            printlog(
                "failed to switch real gid to $gid ($gname) -- real uid=$(" );
                return;
        }
    }
    if ($uid) {

        # do it both ways so linux and bsd are happy
        $< = $> = $uid;
        if ( $> == $uid ) {
            printlog( "\nswitched effective uid to $uid ($uname)\n" );
        } else {
            my $msg =
"failed to switch effective uid to $uid ($uname) -- real uid=$<";
            printlog( $msg );
            return;
        }
        if ( $< == $uid ) {
            printlog( "\nswitched real uid to $uid ($uname)\n" );
        } else {
            printlog(
                "failed to switch real uid to $uid ($uname) -- real uid=$<" );
                return
        }
    }
}
#####################################################################################
#                orderedtie
{

    package orderedtie;

    # This is a tied value that caches lookups from a sorted file; \n separates records,
    # \002 separates the key from the value. After OrderedTieHashSize lookups the cache is
    # cleared. This give us most of the speed of the hash without the huge memory overhead of storing
    # the entire hash and should be totally portable. Picking the best value for n requires some
    # tuning. A \n is required to start the file.
    # if you're updating entries it behoves you to call flush every so often to make sure that your
    # changes are saved. This also frees the memory used to remember updated values.
    # for my purposes a value of undef and a nonexistant key are the same
    # Obviosly if your keys or values contain \n or \002 it will totally goof things up.
    sub TIEHASH {
        my ( $c, $fn ) = @_;
        my $self = { fn => $fn, age => mtime($fn), cnt => 0, cache => {}, updated => {}, ptr => 1, };
        bless $self, $c;
        return $self;
    }
    sub DESTROY { $_[0]->flush(); }
    sub mtime { my @s = stat( $_[0] ); $s[9]; }

    sub flush {
        my $this = shift;
        return unless %{ $this->{ updated } };
        my $f = $this->{ fn };
        open( O, '>', "$f.tmp" ) || return;
        binmode(O);
        open( I, '<', "$f" ) || print O"\n";
        binmode(I);
        local $/ = "\n";
        my @l = ( sort keys %{ $this->{ updated } } );
        my ( $k, $d, $r, $v );

        while ( $r = <I> ) {
            ( $k, $d ) = split( "\002", $r );
            while ( @l && $l[0] lt $k ) {
                $v = $this->{ updated }{ $l[0] };
                print O"$l[0]\002$v\n" if $v;
                shift(@l);
            }
            if ( $l[0] eq $k ) {
                $v = $this->{ updated }{ $l[0] };
                print O"$l[0]\002$v\n" if $v;
                shift(@l);
            }
            else { print O$r; }
        }
        while (@l) {
            $v = $this->{ updated }{ $l[0] };
            print O"$l[0]\002$v\n" if $v;
            shift(@l);
        }
        close I;
        close O;
        unlink($f);
        rename( "$f.tmp", $f );
        $this->{ updated } = {};
    } ## end sub flush

    sub STORE {
        my ( $this, $key, $value ) = @_;
        $this->{ cache }{ $key } = $this->{ updated }{ $key } = $value;
    }

    sub FETCH {
        my ( $this, $key ) = @_;
        return $this->{ cache }{ $key } if exists $this->{ cache }{ $key };
        $this->resetCache()
            if ( $this->{ cnt }++ > 10000
            || ( $this->{ cnt } & 0x1f ) == 0 && mtime( $this->{ fn } ) != $this->{ age } );
        return $this->{ cache }{ $key } = binsearch( $this->{ fn }, $key );
    }

    sub resetCache {
        my $this = shift;
        $this->{ cnt }   = 0;
        $this->{ age }   = mtime( $this->{ fn } );
        $this->{ cache } = { %{ $this->{ updated } } };
    }

    sub binsearch {
        my ( $f, $k ) = @_;
        open( F, '<', "$f" ) || return;
        binmode(F);
        my $siz = my $h = -s $f;
        $siz -= 1024;
        my $l  = 0;
        my $k0 = $k;
        $k =~ s/([\[\]\(\)\*\^\!\|\+\.\\\/\?\`\$\@\{\}])/\\$1/g;    # make sure there's no re chars unqutoed in the key

        #print "k=$k ($_[1])\n";
        while (1) {
            my $m = ( ( $l + $h ) >> 1 ) - 1024;
            $m = 0 if $m < 0;

            #print "($l $m $h) ";
            seek( F, $m, 0 );
            my $d;
            my $read = read( F, $d, 2048 );
            if ( $d =~ /\n$k\002([^\n]*)\n/ ) {
                close F;

                #print "got $1\n";
                return $1;
            }
            my ( $pre, $first, $last, $post ) = $d =~ /^(.*?)\n(.*?)\002.*\n(.*?)\002.*?\n(.*?)$/s;

            #print "f=$first ";
            last unless defined $first;
            if ( $k0 gt $first && $k0 lt $last ) {

                #print "got miss\n";
                last;
            }
            if ( $k0 lt $first ) {
                last if $m == 0;
                $h = $m - 1024 + length($pre);
                $h = 0 if $h < 0;
            }
            if ( $k0 gt $last ) {
                last if $m >= $siz;
                $l = $m + $read - length($post);
            }

            #print "l=$l h=$h ";
        } ## end while (1)
        close F;
        return;
    } ## end sub binsearch

    sub FIRSTKEY {
        my $this = shift;
        $this->flush();
        $this->{ ptr } = 1;
        $this->NEXTKEY();
    }

    sub NEXTKEY {
        my ( $this, $lastkey ) = @_;
        local $/ = "\n";
        open( F, '<', "$this->{fn}" ) || return;
        binmode(F);
        seek( F, $this->{ ptr }, 0 );
        my $r = <F>;
        return unless $r;
        $this->{ ptr } = tell F;
        close F;
        my ( $k, $v ) = $r =~ /(.*?)\002(.*?)\n/s;

        if ( !exists( $this->{ cache }{ $k } ) && $this->{ cnt }++ > 10000 ) {
            $this->{ cnt }   = 0;
            $this->{ cache } = { %{ $this->{ updated } } };
        }
        $this->{ cache }{ $k } = $v;
        $k;
    }

    sub EXISTS {
        my ( $this, $key ) = @_;
        return FETCH( $this, $key );
    }

    sub DELETE {
        my ( $this, $key ) = @_;
        $this->{ cache }{ $key } = $this->{ updated }{ $key } = undef;
    }

    sub CLEAR {
        my ($this) = @_;
        open( F, '>', "$this->{fn}" );
        binmode(F);
        print "\n";
        close F;
        $this->{ cache }   = {};
        $this->{ updated } = {};
        $this->{ cnt }     = 0;
    }
}

