#!/usr/bin/perl # file: razor-caching-proxy.pl # #Copyright 2002 William Stearns #Released under the GPL. # #FIXME - fork for multiple connections. #FIXME - on pipe signal, check to see which closed, and if so, reopen to server or gracefully finish client. #FIXME - allow cache_file and real_server_name to have multiple values: perldoc Getopt::Long, "Options with multiple values". #FIXME - pull default servers out of *.razor.vipul.net with list terminator syntax and test for reachability. #FIXME - Allow offline (no real server) mode. #FIXME - Process action:report messages. #FIXME - batch process up to '.' #FIXME - add seconds and records/second statistics use strict; use IO::Socket qw(:DEFAULT :crlf); use Socket; use IO::Handle; use IO::File; use Getopt::Long; use constant DEFAULT_RAZOR_PORT => 2702; use constant DEFAULT_CACHE_FILE => '/var/cache/razor/razor-cache'; use constant DEFAULT_REAL_SERVER_NAME => 'b.razor.vipul.net.'; use constant CACHE_VERSION => '0.1.1'; #Handle interrupt (Ctrl-C) between client connections my $quit = 0; $SIG{INT} = sub { $quit++ }; #Initial responses, we'll load additional responses from the command line specified cache file. #The Positive hash string listed below is from the sample spam included with the razor package. #This guarantees we'll always have at least one live spam signature to test against. my %responses = ( "cache_version" => "Razor Caching Server live, version " . CACHE_VERSION . ".", "key:d97b1764b257f4f67a7d7fc9e6988aace12c5a56&action:lookup" => "Positive d97b1764b257f4f67a7d7fc9e6988aace12c5a56" ); #Command line options: my $cache_file=DEFAULT_CACHE_FILE; my $listen_port = DEFAULT_RAZOR_PORT; my $real_server_name = DEFAULT_REAL_SERVER_NAME; my $real_server_port = DEFAULT_RAZOR_PORT; my $ignore_negative = 0; my $help = 0; my $verbosity = 0; #Other variables: my $missing_cache_warned = 0; my $client_requests = 0; my $cached_responses = 0; my $live_responses = 0; my $field_separator = '/'; #Don't use ':', '&', or alphanumeric as these are used in the actual queries and responses. #'/' and ',' seem to work fine, '|' does not. #Make sure you edit any cache files to use this new separator if you change this here. sub Debug { my $DebugLevel = shift; if ($verbosity >= $DebugLevel) { my $DebugString = shift; print STDERR "$DebugString"; } } my $USAGEMSG = < File that holds cached queries and responses ($cache_file) --listen_port Port this proxy should listen on for incoming connections. ($listen_port) --server_name Real server this proxy consults for uncached queries ($real_server_name) --server_name also legal (this :port overrides any --server_port) --server_port Port on which that server listens ($real_server_port) --ignore_negative Do not cache negative (non-spam) responses - see below. --help This help message --verbose Send debug output to stdout, use twice to show all loads and stores too. If you use "--ignore-negative", this proxy will not cache Negative responses ("the message with this hash is not a spam") from the server. It will continue to cache Positive responses ("the message with this hash _is_ a spam"). If you leave off this parameter, the proxy will cache both types of messages. This is the default. Examples: To test the proxy (enable the echo service on localhost first): razor-caching-proxy.pl --server_n=localhost:7 --ca=/tmp/razor-cache -v Specify an alternate real server and listen for incoming connections on 8000: razor-caching-proxy.pl --server_n=a.razor.vipul.net. --listen_port 8000 USAGE #Load command line params. die "$USAGEMSG" unless GetOptions( 'cache_file=s' => \$cache_file, 'listen_port=i' => \$listen_port, 'server_name=s' => \$real_server_name, 'server_port=i' => \$real_server_port, 'ignore_negative' => \$ignore_negative, 'help' => \$help, 'verbose+' => \$verbosity ); die "$USAGEMSG" if $help; #Split port number out if embedded in server name if ($real_server_name =~ /:/) { ($real_server_name, $real_server_port) = split(/:/, $real_server_name, 2); } #Load locally cached queries and responses from $cache_file if (my $cache_fh = IO::File->new($cache_file, O_RDONLY)) { # |O_CREAT removed, security risk my $cache_query; my $cache_response; my $loaded_pairs = 0; undef $!; #process one cache entry from local file. while (defined(my $cache_line = <$cache_fh>)) { chomp $cache_line; ($cache_query, $cache_response) = split(/$field_separator/, $cache_line, 2); Debug 2, "Read \"$cache_query,$cache_response\".\n"; #The folllowing test is equivalent to "Load this pair into %responses if the user allows negatives or the line isn't negative anyways." if ( not ( ($ignore_negative == 1) && ($cache_response =~ /^Negative /) ) ) { #look for already cached entries where the cached entry and new entry are different. if (defined $responses{$cache_query} && ($responses{$cache_query} ne $cache_response)) { #warn user of conflicts. #prefer positive over negative. if ($cache_response =~ /^Positive /) { #Replace existing response with Positive one Debug 1, "Duplicate for \"$cache_query\": \"$responses{$cache_query}\" replaced with Positive \"$cache_response\".\n"; $responses{$cache_query} = $cache_response; } elsif ($responses{$cache_query} =~ /^Positive /) { #Do nothing, keep existing Positive response Debug 1, "Duplicate for \"$cache_query\": keeping Positive \"$responses{$cache_query}\", ignoring \"$cache_response\".\n"; } else { #Default to new entry replacing old. Debug 1, "\"Duplicate for \"$cache_query\": $responses{$cache_query}\" replaced with newer \"$cache_response\".\n"; $responses{$cache_query} = $cache_response; } } elsif (defined $responses{$cache_query}) { #Do nothing; the current and new responses are equal. } else { #Non-existing entry - store it in responses array. $responses{$cache_query} = $cache_response; $loaded_pairs++; } } } close $cache_file; if ($ignore_negative) { Debug 1, "loaded $loaded_pairs non-negative unique cached queries from $cache_file.\n"; } else { Debug 1, "loaded $loaded_pairs unique cached queries from $cache_file.\n"; } } elsif ($missing_cache_warned == 0) { #Warn once about missing or unreadable cache file. Debug 0, "Local cache file $cache_file unavailable or unreadable (create it if it's not there and check permissions, please): $!\n"; $missing_cache_warned++; } #It appears the razor servers actually use 0x0a as the terminator for all queries and responses, so we use LF instead of CRLF. $/ = LF; #Input record separator #Setting up the server port so we can allow incoming connections. my $incoming_sock = IO::Socket::INET->new( Listen => 20, LocalPort => $listen_port, Timeout => 60*60, Reuse => 1) or die "Can't create listening socket: $!\n"; Debug 1, "Waiting for incoming connections on port $listen_port...\n"; while (!$quit) { next unless my $incoming_session = $incoming_sock->accept; my $peer_host = gethostbyaddr($incoming_session->peeraddr,AF_INET) || $incoming_session->peerhost; my $peer_port = $incoming_session->peerport; Debug 1, "Connection from [$peer_host,$peer_port]\n"; #FIXME - defer connection to real server until actually needed. Debug 2, "Opening connection to server.\n"; my $real_server_sock = IO::Socket::INET -> new( PeerAddr => $real_server_name, PeerPort => $real_server_port, Timeout => 60 ) or die "Connection to $real_server_name:$real_server_port failed: $!\n"; $real_server_sock->autoflush(1); Debug 1, "Connected to $real_server_name:$real_server_port.\n"; if ($real_server_port == 2702) { Debug 2, "Trying to grab banner.\n"; my $server_banner; #Grab the banner line from the real server. $/ = LF; $server_banner=<$real_server_sock>; chomp $server_banner; Debug 2, "Server sent back \"$server_banner\".\n"; #FIXME - check and modify behavior based on "protocol version N" in above banner. } #print $incoming_session "Vipul's Razor Cache " . CACHE_VERSION . ", protocol version 2." . LF; #Client gets fussy about server version string. Reminds me of Win 3.x refusing to load over DR-DOS... :-) print $incoming_session "Vipul's Razor 1.11, protocol version 2.\n"; $/ = LF; while (my $client_query=<$incoming_session>) { my $response; #DROPME #We need to strip off either or both of CR and LF. This gets CR, LF, or CRLF forms of line separator. $/ = LF; chomp $client_query; $/ = CR; chomp $client_query; $/ = LF; Debug 2, "-------- Query $client_query received.\n"; if ($client_query eq '.') { #End of queries. Now we process the responses sent by the server. print $real_server_sock ".\n"; #Grab all response lines the server has to give us. This approach doesn't care #If the server actually responds to all queries, we just take whatever answers come back, #hand them off to the client, and cache as appropriate. $/ = LF; while (my $response=<$real_server_sock>) { Debug 2, "Response received.\n"; $live_responses++; chomp $response; print $incoming_session "$response\n"; my $response_verdict; my $response_key; ($response_verdict, $response_key) = split(/\s+/, $response, 2); if ( ($response_verdict eq "Positive") || ($response_verdict eq "Negative") ) { my $recreated_query = "key:$response_key&action:lookup"; if ( not ( ($ignore_negative == 1) && ($response_verdict eq "Negative" ) ) ) { #Save the response in live array only if the response is non-negative or the user wants to cache all types. $responses{$recreated_query} = $response; } else { Debug 2, "Not using response in live query array.\n"; } #Save in cache file so we can load it in on future invocations. #Note - we save even negative responses even if the user specified --ignore_negative because #a future run of this program may want to load in previously seen negative responses. Any future #run that specifies --ignore_negative will simply not load Negative lines from the cache file. if (my $cache_fh = IO::File->new("$cache_file", O_WRONLY|O_APPEND)) { # |O_CREAT removed, security risk $cache_fh->autoflush(1); print $cache_fh "$recreated_query$field_separator$response\n"; close $cache_fh; Debug 2, "stored \"$recreated_query,$response\" in cache file: $cache_file.\n"; } elsif ($missing_cache_warned == 0) { #Warn once about missing or unwritable cache file. Debug 0, "Local cache file $cache_file unavailable or unwritable for storing new entries (create it if it's not there and check permissions, please): $!.\n"; $missing_cache_warned++; } } elsif ($response_verdict eq "Accepted") { my $recreated_query = "key:$response_key&action:report"; $responses{$recreated_query} = $response; #Create a simulated query to match future lookups for this reported key. my $simulated_positive_query = "key:$response_key&action:lookup"; my $simulated_positive_response = "Positive $response_key"; #We don't have to check for $ignore_negative here because all Accepted keys are Positive. $responses{$simulated_positive_query} = $simulated_positive_response; if (my $cache_fh = IO::File->new("$cache_file", O_WRONLY|O_APPEND)) { # |O_CREAT removed, security risk $cache_fh->autoflush(1); print $cache_fh "$recreated_query$field_separator$response\n"; print $cache_fh "$simulated_positive_query$field_separator$simulated_positive_response\n"; close $cache_fh; Debug 2, "stored \"$recreated_query,$response\" in cache file: $cache_file.\n"; Debug 2, "stored \"$simulated_positive_query,$simulated_positive_response\" in cache file: $cache_file.\n"; } elsif ($missing_cache_warned == 0) { #Warn once about missing or unwritable cache file. Debug 0, "Local cache file $cache_file unavailable or unwritable for storing new entries (create it if it's not there and check permissions, please): $!.\n"; $missing_cache_warned++; } } elsif ($response_verdict eq "Rejected") { my $recreated_query = "key:$response_key&action:report"; $responses{$recreated_query} = $response; #This wasn't accepted by the server, so we can't create a simulated Positive or Negative entry. if (my $cache_fh = IO::File->new("$cache_file", O_WRONLY|O_APPEND)) { # |O_CREAT removed, security risk $cache_fh->autoflush(1); print $cache_fh "$recreated_query$field_separator$response\n"; close $cache_fh; Debug 2, "stored \"$recreated_query,$response\" in cache file: $cache_file.\n"; } elsif ($missing_cache_warned == 0) { #Warn once about missing or unwritable cache file. Debug 0, "Local cache file $cache_file unavailable or unwritable for storing new entries (create it if it's not there and check permissions, please): $!.\n"; $missing_cache_warned++; } } elsif ($response eq "Unknown command.") { #Do nothing, we can't match it up with a command. Debug 2, "Ignoring \"Unknown command.\" response.\n"; } elsif ($response eq ".") { #Do nothing, we're connected to an echo server. Debug 2, "Ignoring \".\" response.\n"; } elsif ($response =~ /^key:/) { #Do nothing, we're connected to an echo server. Debug 2, "Ignoring \"key:\" response.\n"; } else { Debug 1, "$response_verdict not Positive or Negative in $response.\n"; } } #Note: The "." _appears_ to be an "end of requests" mark. If I knew how to test if #more data could be read from the client, I'd do so, allowing the client to #send more requests on the same connection after a ".". I don't, so we'll close #both client and server connections here. if ($incoming_session->connected) { close $incoming_session; } if ($real_server_sock->connected) { close $real_server_sock; } } elsif ($client_query =~ /action:report$/) { #Hand report action directly to server, do not cache at all. #FIXME - remove any lines with that hash from the local database. $client_requests++; Debug 1, "Send off $client_query.\n"; print $real_server_sock "$client_query\n"; } elsif (defined $responses{$client_query}) { #We have a cached entry for this already, hand it back to the user. $client_requests++; $cached_responses++; Debug 1, "Cached response to $client_query.\n"; print $incoming_session "$responses{$client_query}\n"; } else { #We have no cached entry - ask the real server. $client_requests++; Debug 1, "Live response to $client_query.\n"; print $real_server_sock "$client_query\n"; } } #The client waits for us to close the connection, so do so. Close server socket too. Debug 1, "Connection from [$peer_host,$peer_port] finished.\n"; if ($incoming_session->connected) { close $incoming_session; } if ($real_server_sock->connected) { close $real_server_sock; } } if ($verbosity >= 1) { if ($client_requests > 0) { my $cached_percent = (($cached_responses * 100 ) / $client_requests); printf STDERR ("%d client requests, %d (%.2f%%) served from cache, %d served live.\n", $client_requests, $cached_responses, $cached_percent, $live_responses); } else { printf STDERR ("%d client requests.\n", $client_requests); } } close $incoming_sock;