Over a year a go I wrote an article on how to stop spammers by adding them in your .htaccess file.
In the article I mention a Python script that does the work for me. Well I decided, thanks to a comment in the article to publish the program here.
The Python program is called update-htaccess.py and uses a configuration file call update-htaccess.ini and it can do the following:
- It downloads the CSV list from Stop Forum Spam.
- It can download the Apache log from a server or use a local file. My provider offers two different logs, a monthly one and a current running daily one.
- It can download the .htaccess file or use a local file.
- It can upload the .htacces file.
The program keeps a log file of what it does and at the end it will show a summary of what it did.
Example:
[2009/10/08 06:40:11] [INFO ] Starting update-htaccess.py
[2009/10/08 06:40:11] [INFO ] Retrieving Banned IP list from stopforumspam.com
[2009/10/08 06:40:13] [INFO ] Banned IP list retrieved
[2009/10/08 06:40:14] [INFO ] Retrieving Apache log forums.avirtualhome.com-Oct-2009.gz
[2009/10/08 06:40:16] [INFO ] Apache logfile forums.avirtualhome.com-Oct-2009.gz retrieved
[2009/10/08 06:40:16] [INFO ] Retrieving .htaccess file
[2009/10/08 06:40:18] [INFO ] .htaccess file retrieved
[2009/10/08 06:40:18] [INFO ] Processing Apache logfile
[2009/10/08 06:40:18] [INFO ] Apache logfile processed
[2009/10/08 06:40:18] [INFO ] Starting update .htaccess file
[2009/10/08 06:40:19] [INFO ] Update .htaccess file completed
[2009/10/08 06:40:20] [INFO ] Uploading .htaccess file
[2009/10/08 06:40:21] [INFO ] .htaccess file uploaded
[2009/10/08 06:40:21] [INFO ] Total IP scanned: 753 - Clean: 614 - Spam: 139
[2009/10/08 06:40:21] [INFO ] Total Banned: 1100 - New: 16 - Known: 123 - Removed: 0
[2009/10/08 06:40:21] [INFO ] update-htaccess.py finished
The program has the following requirements:
OS: Linux (I use a system command to copy a file)
Python >= 2.5
The Python modules: logging, urllib2, ConfigParser, gzip, ftplib, urllib2, optparse
The module SFS, which you can download here
It uses a configuration file for all needed information.
You also need to add several lines in your .htaccess, the program will insert the appropriate lines between those lines.
The program itself:
#!/usr/bin/python
import os
import sfs
import logging, logging.config
import urllib2
import ConfigParser
import gzip
from ftplib import FTP
from urllib2 import Request, urlopen, URLError
from optparse import OptionParser
def storFtpFile(ftp, ftp_dir, os_file, outfile=None):
ftp.cwd(ftp_dir)
ftp.storbinary('STOR '+ os_file, file(os_file,'rb'))
def getFtpFile(ftp, ftp_dir, ftp_file, outfile=None):
if outfile is None:
outfile=ftp_file
ftp.cwd(ftp_dir)
if ftp_file not in ftp.nlst():
exit('Not found %s' % (ftp_dir+ftp_file))
ftp.retrbinary('RETR ' + ftp_file, open(outfile,'wb').write)
def getStopForumSpammersList():
logger.info('Retrieving Banned IP list from stopforumspam.com')
req=urllib2.Request('http://www.stopforumspam.com/downloads/bannedips.csv')
try:
response=urllib2.urlopen(req)
except URLError, e:
if hasattr(e,'reason'):
logger.error('Failure: %s' % (e.reason))
print 'Failure: ', e.reason
exit()
elif hasattr(e,'code'):
logger.error('Error: %s' % (e))
print 'Error code: ', e
exit()
spamips=response.read()
spamip = spamips.split(",")
logger.info('Banned IP list retrieved')
return(spamip)
def getApacheLog(ftp):
section=options.period+'log'
ftp_file=conf.getIniValue(section,'log')
compressed=conf.getIniValue(section,'compressed')
ftp_dir=conf.getIniValue(section,'dir')
if not options.year:
options.year=conf.getIniValue(section,'year')
if options.period == 'monthly':
ftp_file=ftp_file.replace('%month%',options.month.capitalize())
ftp_file=ftp_file.replace('%year%',options.year)
if compressed is not None:
ftp_file=ftp_file+'.'+compressed
if not options.locallog:
logger.info('Retrieving Apache log %s' % (ftp_file))
getFtpFile(ftp,ftp_dir,ftp_file)
logger.info('Apache logfile %s retrieved' % (ftp_file))
return(ftp_file, compressed)
def getHTAccessFile(ftp):
logger.info('Retrieving .htaccess file')
file='.htaccess'
ftp_dir=conf.getIniValue('htaccess','dir')
getFtpFile(ftp,ftp_dir,file)
logger.info('.htaccess file retrieved')
def putHTAccessFile(ftp):
logger.info('Uploading .htaccess file')
file='.htaccess'
ftp_dir=conf.getIniValue('htaccess','dir')
storFtpFile(ftp,ftp_dir,file)
logger.info('.htaccess file uploaded')
class Config:
def __init__(self):
self.config = ConfigParser.ConfigParser()
self.config.read('update-htaccess.ini')
def getIniValue(self, section, variable):
if self.config.has_option(section,variable):
ret = self.config.get(section,variable)
else:
ret = None
return (ret)
def main():
global logger, conf, options
conf = Config()
usage = 'usage: %prog [options]'
parser = OptionParser(usage)
parser.set_defaults(period='daily')
parser.add_option('-p', '--period', dest='period', help='Choose daily or monthly',default='daily')
parser.add_option('-m', '--month', dest='month', help='Which month to retrieved')
parser.add_option('-y', '--year', dest='year', help='Which year to retrieve')
parser.add_option('-a', '--access', action="store_true", dest='localaccess', help='Use local copy of htaccess file')
parser.add_option('-l', '--log', action="store_true", dest='locallog', help='Use local copy of log file')
parser.add_option('-u', '--upload', action="store_true", dest='upload', help='Upload the htaccess file')
(options, args) = parser.parse_args()
if options.period not in ['daily','monthly']:
parser.error('Only daily or monthly is allowed')
if options.period == 'monthly':
if options.month is None:
parser.error('When using monthly, month has to be given as well')
if options.month.lower() not in ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']:
parser.error('Invalid month given')
if options.year:
if not options.year.isdigit():
parser.error('Invalid year given')
logging.config.fileConfig("update-htaccess.ini")
logger = logging.getLogger("log02")
logger.info('Starting update-htaccess.py')
# Get the list from http://www.stopforumspam.com
spamip = getStopForumSpammersList()
# Setup FTP
ftp_user=conf.getIniValue('site','user')
ftp_passwd=conf.getIniValue('site','passwd')
ftp_host=conf.getIniValue('site','host')
ftp=FTP(ftp_host,ftp_user,ftp_passwd)
# Get the access log from FTP
log, compressed = getApacheLog(ftp)
# Get .htaccess from FTP
if not options.localaccess:
getHTAccessFile(ftp)
#Log out the FTP session
ftp.close()
# The IP's from the access-log, sorted uniquely
logger.info('Processing Apache logfile')
if compressed is None:
file = open(log, 'rb')
else:
file = gzip.open(log)
ip_list=[]
show=50
while 1:
if show == 50:
print '.',
show=0
show += 1
line=file.readline()
if not line:
break
line = line.split('-',1)
ip = line[0].strip()
if ip not in ip_list:
ip_list.append(ip)
file.close()
ip_list.sort()
logger.info('Apache logfile processed')
# Process .htaccess file
os.system('cp .htaccess htaccess-old')
old_access_file = open ('htaccess-old','r')
old_access = old_access_file.readlines()
new_access_file = open ('.htaccess','w')
#Get all Deny From old Htaccess file
old_deny_list=[]
new_deny_list=[]
for line in old_access:
if line[0:9] == 'deny from':
old_deny_list.append(line)
new_deny_list.append(line)
# Update the new .htaccess file
allip=0
cleanip=0
removedip=0
newbanned=0
oldbanned=0
logger.info('Starting update .htaccess file')
for line in old_access:
if not line[0:9] == 'deny from':
new_access_file.write(line)
if line.strip() == '#Start DENY':
for ip in ip_list:
allip += 1
# Do online check using the two next line
#iresult = sfs.checkIP(ip.strip())
#if iresult["appears"]:
# Use the CSV file for faster results
if ip in spamip:
if "deny from "+ip+'\n' in old_deny_list:
print "Old banned IP found: %s" % (ip)
oldbanned += 1
else:
print "New banned ip found: %s" % (ip)
new_deny_list.append("deny from "+ip+'\n')
newbanned += 1
else:
if "deny from "+ip in old_deny_list:
print "Removed banned IP: %s" % (ip)
new_deny_list.remove('deny from '+ip+'\n')
removedip += 1
print "Clean IP: %s" % (ip)
cleanip += 1
new_deny_list.sort()
for deny in new_deny_list:
new_access_file.write(deny)
totalbanned=len(new_deny_list)
new_access_file.close()
old_access_file.close()
logger.info('Update .htaccess file completed')
if options.upload:
if newbanned>0:
# Setup FTP
ftp_user=conf.getIniValue('site','user')
ftp_passwd=conf.getIniValue('site','passwd')
ftp_host=conf.getIniValue('site','host')
ftp=FTP(ftp_host,ftp_user,ftp_passwd)
# Upload
putHTAccessFile(ftp)
#Log out the FTP session
ftp.close()
# Print out result and send to logging for future reference
logger.info("Total IP scanned: %d - Clean: %d - Spam: %d" % (allip, cleanip, oldbanned+newbanned-removedip))
logger.info("Total Banned: %d - New: %d - Known: %d - Removed: %d" % (totalbanned, newbanned, oldbanned, removedip))
logger.info('update-htaccess.py finished')
logging.shutdown()
if __name__ == "__main__":
main()
The configuration file
[site]
host=
user=
passwd=
[dailylog]
dir=
log=
[monthlylog]
year=
dir=
log=
compressed=gz
[htaccess]
dir=
[loggers]
keys=root,file
[handlers]
keys=console,file
[formatters]
keys=console,file
[logger_root]
level=DEBUG
handlers=console
[logger_file]
level=DEBUG
qualname=log02
handlers=file
[handler_console]
class=StreamHandler
formatter=console
stream=sys.strderr
args=(sys.stderr,)
[handler_file]
class=FileHandler
level=DEBUG
formatter=file
filename=update-htaccess.log
args=('update-htaccess.log', 'a')
[formatter_console]
format=[%(levelname)s] %(message)s
datefmt=
[formatter_file]
format=[%(asctime)s] [%(levelname)-8s] %(message)s
datefmt=%Y/%m/%d %H:%M:%S
The lines you need to add in your .htaccess file
order allow,deny .
#Start DENY
#End DENY
allow from all
A lot of this is aimed towards my needs and situation, you might have to change some things for it to work with your provider.
Please use my forum if you have questions about the program, suggestions, bugfixes etc
