#!/usr/bin/perl # # # Convert William Stearns URI list to use less rules # # Original list can be found here: # http://www.stearns.org/sa-blacklist/ # $SCORE=""; # What score should these rules use? Default is same as source. $MAXPERRULE=50; # How many sites per rule? $NUMCHARS=1; # How man characters to optimize with? $RULENAME=""; # What should the rule name begin with? Default is same as source with OPT_ appended. # Sequence number will also be appended. %uri=(); print "#\n"; print "# Converted William Stearns URI list to use less rules\n"; print "#\n"; print "# Original list can be found here:\n"; print "# http://www.stearns.org/sa-blacklist/\n"; print "#\n"; $date=localtime(); print "#\n# Generated: $date\n#\n"; while (<>) { chomp; print $_ . "\n" if (m/^#/); $uri{$1}++ if (m/^describe.*URI contains (.*)$/); $RULENAME=$1 . "OPT_" if ($RULENAME eq "" && m/^describe\s+(\D+)/); $SCORE=$1 if ($SCORE eq "" && m/^score\s+\S+\s+([0-9.]+)$/); } $break=""; $linecount=0; $sitecount=0; sub ruleout { $out.=')\b/i'; $out='m/\b' . $lastsite . '\b/i' if ($sitecount == 1); print "describe $RULENAME$linecount\tURI contains sites starting with $break\n"; print "uri $RULENAME$linecount\t$out\n"; print "score $RULENAME$linecount\t$SCORE\n\n"; } foreach $site (sort keys %uri) { $key=substr($site, 0, $NUMCHARS); if ($break eq "" || $break ne $key || $sitecount > $MAXPERRULE) { if ($break ne "") { ruleout; $linecount++; $sitecount=0; $out=""; } $out.='m/\b' . $key . '(?:'; $break=$key; } $out.='|' if ($sitecount > 0); $shortsite=substr($site, $NUMCHARS); $shortsite =~ s/\./\\\./g; $out.=$shortsite; $sitecount++; $lastsite=$site; } if ($sitecount != 0) { ruleout; }