config/e2guardian/e2guardian.conf

# e2guardian config file for version 5.3.1

#NOTE This file is only read at start-up
#
# but the lists defined in this file are re-read on reload or gentle restart
# as is any rooms directory files.

# Language dir where languages are stored for internationalisation.
# The HTML template within this dir is only used when reportinglevel
# is set to 3. When used, e2guardian will display the HTML file instead of
# using the perl cgi script.  This option is faster, cleaner
# and easier to customise the access denied page.
# The language file is used no matter what setting however.
#
languagedir = '/usr/share/e2guardian/languages'

# language to use from languagedir.
language = 'ukenglish'

#Debug Level
#Enable debug e2guardian
#debug one value:
#Eg
#  debuglevel = 'ICAP'
#Enable ICAP debug informations only
#
#Eg
#  debuglevel = 'ALL'
#Enable ALL debug informations
#
#Additive mode:
#Eg
#  debuglevel = 'ICAP,NET'
#Enable ICAP and NET debug informations
#
#Soustractive mode:
#Eg
#  debuglevel = 'ALL,-ICAP'
#Enable all debug informations but without ICAP debug informations
#  debuglevel = 'ALL,-ICAP,-NET,-FILTER'
#Enable all debug informations but without ICAP, NETWORK and FILTER debug informations
#by default disabled, if this option is required just uncomment the line below
#works also with e2guardian -N (-N Do not go into the background)
#Possible value : ICAP CLAMAV ICAPC (icap client) 
#debuglevel = 'ALL'

#Directory for result of debug level (log)
#Works only if debuglevel is enabled
#
#debuglevelfile = '/data/log/e2guardian/debuge2'

# Logging Settings
#
# 0 = none  1 = just denied  2 = all text based  3 = all requests
loglevel = 3 

# Log Exception Hits
# Log if an exception (user, ip, URL, phrase) is matched and so
# the page gets let through.  Can be useful for diagnosing
# why a site gets through the filter.
# 0 = never log exceptions
# 1 = log exceptions, but do not explicitly mark them as such
# 2 = always log & mark exceptions (default)
logexceptionhits = 2

# Log File Format
# 1 = Dansguardian format (space delimited)
# 2 = CSV-style format
# 3 = Squid Log File Format
# 4 = Tab delimited
# Protex format type 5 Tab delimited, squid style format with extra fields
# for filter block/result codes, reasons, filter group, and system name 
# used in arrays so that combined logs show originating server.
# 5 = Protex format
# Protex format type 6 Same format as above but system name field is blank
# used in stand-alone systems.
# 6 = Protex format with server field blanked

logfileformat = 6 

# Log a specific value from header
# low case only
# only used with logs: 1,5 and 6
logheadervalue = 'proxy-authorization:'

# truncate large items in log lines
# allowable values 10 to 32000
# default 2000
# unlimited not longer allowed - 0 will now set default of 2000 
maxlogitemlength = 2000

# anonymize logs (blank out usernames & IPs)
#anonymizelogs = off

# Syslog logging
#
# Use syslog for access logging instead of logging to the file
# at the defined or built-in "loglocation"
#logsyslog = off

#Suffix to append to program name when logging through syslog
# Default is the e2Guardian instance number
#namesuffix = $z

# Log file location
# 
# Defines the log directory and filename.
loglocation = '/data/log/e2guardian/access.log'

# Dymamic statistics log file location
#
# Defines the dstats file directory and filename.
# Once every 'dstatinterval' seconds, stats on number of threads in use,
# Q sizes and other useful information is written to this file.
# Format is similar to sar.  See notes/dstats_format for more details.
# Default is not to write stats.
dstatlocation = '/data/log/e2guardian/dstats.log'

# Interval in seconds between stats output
# Default 300 (= 5 mins)
# Minimum 10 
# Maximum 3600 (= 1 hour)
dstatinterval = 300  # = 5 minutes

# Time format is epoch GMT+0 by default | statshumanreadable change to local zone
statshumanreadable = on

# Container mode
# the process will not fork into the background AND log in stdout 
# In this mode systemd service is disabled !
# Default:
dockermode = on

# Network Settings
# 
# the IP that e2guardian listens on.  If left blank e2guardian will
# listen on all IPs.  That would include all NICs, loopback, modem, etc.
# Normally you would have your firewall protecting this, but if you want
# you can limit it to a certain IP. To bind to multiple interfaces,
# specify each IP on an individual filterip line.
# If mapportstoips is 'on' you can have the same IP twice so long as 
# it has a different port.
filterip =

# the ports that e2guardian listens to.  Specify one line per filterip
# line.  If both mapportstoips and mapauthtoports are set to 'on'
# you can specify different authentication mechanisms per port but
# only if the mechanisms can co-exist (e.g. basic/proxy auth can't)
filterports = 8080
#filterports = 8081

# Map ports to IPs
# If enabled map filterports to filterip - number of filterports must then be same as
# number of filterip
# If disabled will listen on all filterports on all filterips.
# on (default) | off
#mapportstoips = off

#port for transparent https
#if defined enables tranparent https
transparenthttpsport = 8443

#port for ICAP
#if defined enables icap mode 
icapport = 1344

# the ip of upstream proxy - optional - if blank e2g will go direct to sites.
# default is "" i.e. no proxy 
proxyip = 127.0.0.1

# the port e2guardian connects to proxy on
proxyport = 3127

# Proxy timeout 
# Set tcp timeout between the Proxy and e2guardian
# This is a connection timeout
# If proxy is remote you may need to increase this to 10 or more.
# Min 5 - Max 100
proxytimeout = 5

# Connect timeout 
# Set tcp timeout between the e2guardian and upstream service (proxy or target host)
# This is a connection timeout
# For remote sites you may need to increase this to 10 or more.
# Min 1 - Max 100
# default 3
connecttimeout = 5

# Connect retries
# Set the number of retries to make on connection failure before giving up
# Min 1 - Max 100
# default 1

# Proxy header exchange
# Set timeout between the Proxy and e2guardian 
# Min 20 - Max 300
# If this is higher than proxies timeout user will get proxy Gateway error page
# If lower e2guardian Gateway error page
proxyexchange = 61

# Pconn timeout
# how long a persistent connection will wait for other requests
# squid apparently defaults to 1 minute (persistent_request_timeout),
# so wait slightly less than this to avoid duff pconns.
# Min 5 - Max 300
pcontimeout = 55

# Whether to retrieve the original destination IP in transparent proxy
# setups and check it against the domain pulled from the HTTP headers.
#
# Be aware that when visiting sites which use a certain type of round-robin
# DNS for load balancing, DG may mark requests as invalid unless DG gets
# exactly the same answers to its DNS requests as clients.  The chances of
# this happening can be increased if all clients and servers on the same LAN
# make use of a local, caching DNS server instead of using upstream DNS
# directly.
#
# See http://www.kb.cert.org/vuls/id/435052
# on (default) | off
#!! Not compiled !! originalip = off

# Banned image replacement
# Images that are banned due to domain/url/etc reasons including those
# in the adverts blacklists can be replaced by an image.  This will,
# for example, hide images from advert sites and remove broken image
# icons from banned domains.
# on (default) | off
usecustombannedimage = on
custombannedimagefile = '/usr/share/e2guardian/transparent1x1.gif'

#Banned flash replacement
usecustombannedflash = on
custombannedflashfile = '/usr/share/e2guardian/blockedflash.swf'

# Filter groups options
# filtergroups sets the number of filter groups. A filter group is a set of content
# filtering options you can apply to a group of users.  The value must be 1 or more.
# e2guardian will automatically look for e2guardianfN.conf where N is the filter
# group.  To assign users to groups use the filtergroupslist option.  All users default
# to filter group 1.  You must have some sort of authentication to be able to map users
# to a group.
filtergroups = 1
filtergroupslist = '/etc/e2guardian/lists/filtergroupslist'

# default filtergroup for standard (explicit) mode
# optional defaults to 1
#defaultfiltergroup = 1;

# default filtergroup for transparent proxy mode
# optional defaults to 1
#defaulttransparentfiltergroup = 1;

# default filtergroup for ICAP mode
# optional defaults to 1
#defaulticapfiltergroup = 1;

# If on it a user without group is considered like unauthenfied
# E2guardian tries the next plugin
# If off the user is connected with group1
# Defaults to off
# authrequiresuserandgroup = off

# Authentication files location
# These are now replaced with pre-authstoryboard logic but lists defined here
#
# bannediplist is ONLY for banned client IP
iplist = 'name=bannedclient,messageno=100,logmessageno=103,path=/etc/e2guardian/lists/bannediplist'
# Put client dns names in bannedclientlist if required
#sitelist = 'name=bannedclient,messageno=100,logmessageno=104,path=/etc/e2guardian/lists/bannedclientlist'
# exceptioniplist is ONLY for exception client IP
iplist = 'name=exceptionclient,messageno=600,path=/etc/e2guardian/lists/exceptioniplist'
# Put client dns names in exceptionclientlist if required
#sitelist = 'name=exceptionclient,messageno=631,path=/etc/e2guardian/lists/exceptionclientlist'

# authexception lists are for exception sites/urls allowed before authentication# to allow for machines to update without user authentication
iplist = 'name=authexception,messageno=602,path=/etc/e2guardian/lists/authexceptioniplist'
sitelist = 'name=authexception,messageno=602,path=/etc/e2guardian/lists/authexceptionsitelist'
urllist = 'name=authexception,messageno=603,path=/etc/e2guardian/lists/authexceptionurllist'

#Note: only iplist, sitelist, ipsitelist and urllist can currently be defined for use with pre-authstoryboard.

# Per-Room definition directory
# A directory containing text files containing the room's name followed by IPs or ranges
# and optionaly site and url lists
# Think of it as bannediplist and/or exceptions on crack
# perroomdirectory = '/etc/e2guardian/lists/rooms/'

# Show weighted phrases found
# If enabled then the phrases found that made up the total which excedes
# the naughtyness limit will be logged and, if the reporting level is
# high enough, reported. on | off
showweightedfound = on

# Weighted phrase mode
# There are 3 possible modes of operation:
# 0 = off = do not use the weighted phrase feature.
# 1 = on, normal = normal weighted phrase operation.
# 2 = on, singular = each weighted phrase found only counts once on a page.
# 
# IMPORTANT: Note that setting this to "0" turns off all features which
# extract phrases from page content, including banned & exception
# phrases (not just weighted), search term filtering, and scanning for
# links to banned URLs.
#
weightedphrasemode = 2

# Smart, Raw and Meta/Title phrase content filtering options
# Smart is where the multiple spaces and HTML are removed before phrase filtering
# Raw is where the raw HTML including meta tags are phrase filtered
# Meta/Title is where only meta and title tags are phrase filtered (v. quick)
# CPU usage can be effectively halved by using setting 0 or 1 compared to 2
# 0 = raw only
# 1 = smart only
# 2 = both of the above (default)
# 3 = meta/title
phrasefiltermode = 2

# Lower casing options
# When a document is scanned the uppercase letters are converted to lower case
# in order to compare them with the phrases.  However this can break Big5 and
# other 16-bit texts.  If needed preserve the case.  As of version 2.7.0 accented
# characters are supported.
# 0 = force lower case (default)
# 1 = do not change case
# 2 = scan first in lower case, then in original case
preservecase = 0

# Note:
# If phrasefiltermode and preserve case are both 2, this equates to 4 phrase
# filtering passes. If you have a large enough userbase for this to be a
# worry, and need to filter pages in exotic character encodings, it may be
# better to run two instances on separate servers: one with preservecase 1
# (and possibly forcequicksearch 1) and non ASCII/UTF-8 phrase lists, and one
# with preservecase 0 and ASCII/UTF-8 lists.

# Hex decoding options
# When a document is scanned it can optionally convert %XX to chars.
# If you find documents are getting past the phrase filtering due to encoding
# then enable.  However this can break Big5 and other 16-bit texts.
# off = disabled (default)
# on = enabled
hexdecodecontent = off

# Force Quick Search rather than DFA search algorithm
# The current DFA implementation is not totally 16-bit character compatible
# but is used by default as it handles large phrase lists much faster.
# If you wish to use a large number of 16-bit character phrases then
# enable this option.
# off (default) | on (Big5 compatible)
forcequicksearch = off

# Reverse lookups for banned site and URLs.
# If set to on, e2guardian will look up the forward DNS for an IP URL
# address and search for both in the banned site and URL lists.  This would
# prevent a user from simply entering the IP for a banned address.
# It will reduce searching speed somewhat so unless you have a local caching
# DNS server, leave it off and use the Blanket IP Block option in the
# f1.story file instead.
reverseaddresslookups = off

# Reverse lookups for banned and exception IP lists.
# If set to on, e2guardian will look up the forward DNS for the IP
# of the connecting computer.  
# If a client computer is matched against an IP given in the lists, then the
# IP will be recorded in any log entries; if forward DNS is successful and a
# match occurs against a hostname, the hostname will be logged instead.
# It will reduce searching speed somewhat so unless you have a local DNS server, 
# leave it off.
reverseclientiplookups = off

# Perform reverse lookups on client IPs for successful requests.
# If set to on, e2guardian will look up the forward DNS for the IP
# of the connecting computer, and log host names (where available) rather than
# IPs against requests.
# This is not dependent on reverseclientiplookups being enabled; however, if it
# is, enabling this option does not incur any additional forward DNS requests.
logclienthostnames = off

# Max content filter size
# Sometimes web servers label binary files as text which can be very
# large which causes a huge drain on memory and cpu resources.
# To counter this, you can limit the size of the document to be
# filtered and get it to just pass it straight through.
# This setting also applies to content regular expression modification.
# The value must not be higher than maxcontentramcachescansize
# Do not set this too low as this will result in pages that contain a
#  long preamble not being content filtered
# The size is in Kibibytes - eg 2048 = 2Mb
# use 0 to set it to maxcontentramcachescansize
maxcontentfiltersize = 1024

# Max content ram cache scan size
# This is only used if you use a content scanner plugin such as AV
# This is the max size of file that e2g will download and cache
# in RAM.  After this limit is reached it will cache to disk
# This value must be less than or equal to maxcontentfilecachescansize.
# The size is in Kibibytes - eg 10240 = 10Mb
# use 0 to set it to maxcontentfilecachescansize
# This option may be ignored by the configured download manager.
maxcontentramcachescansize = 2000

# Max content file cache scan size
# This is only used if you use a content scanner plugin such as AV
# This is the max size file that DG will download
# so that it can be scanned or virus checked.
# This value must be greater or equal to maxcontentramcachescansize.
# The size is in Kibibytes - eg 10240 = 10Mb
maxcontentfilecachescansize = 20000

# File cache dir
# Where DG will download files to be scanned if too large for the
# RAM cache.
filecachedir = '/tmp'

# Delete file cache after user completes download
# When a file gets save to temp it stays there until it is deleted.
# You can choose to have the file deleted when the user makes a sucessful
# download.  This will mean if they click on the link to download from
# the temp store a second time it will give a 404 error.
# You should configure something to delete old files in temp to stop it filling up.
# on|off (defaults to on)
deletedownloadedtempfiles = on

# Initial Trickle delay
# This is the number of seconds a browser connection is left waiting
# before first being sent *something* to keep it alive.  The
# *something* depends on the download manager chosen.
# Do not choose a value too low or normal web pages will be affected.
# A value between 20 and 110 would be sensible
# This may be ignored by the configured download manager.
initialtrickledelay = 20

# Trickle delay
# This is the number of seconds a browser connection is left waiting
# before being sent more *something* to keep it alive.  The
# *something* depends on the download manager chosen.
# This may be ignored by the configured download manager.
trickledelay = 10

# Download Managers
# These handle downloads of files to be filtered and scanned.
# They differ in the method they deal with large downloads.
# Files usually need to be downloaded 100% before they can be
# filtered and scanned before being sent on to the browser.
# Normally the browser can just wait, but with content scanning,
# for example to AV, the browser may timeout or the user may get
# confused so the download manager has to do some sort of
# 'keep alive'.
#
# There are various methods possible but not all are included.
# The author does not have the time to write them all so I have
# included a plugin systam.  Also, not all methods work with all
# browsers and clients.  Specifically some fancy methods don't
# work with software that downloads updates.  To solve this,
# each plugin can support a regular expression for matching
# the client's user-agent string, and lists of the mime types
# and extensions it should manage.
#
# Note that these are the matching methods provided by the base plugin
# code, and individual plugins may override or add to them.
# See the individual plugin conf files for supported options.
#
# The plugins are matched in the order you specify and the last
# one is forced to match as the default, regardless of user agent
# and other matching mechanisms.
#
# NOTE - ONLY default downloadmanager is supported in v5
downloadmanager = '/etc/e2guardian/downloadmanagers/default.conf'

# Content Scanners (Also known as AV scanners)
# These are plugins that scan the content of all files your browser fetches
# for example to AV scan.  You can have more than one content
# scanner. The plugins are run in the order you specify.
# This is one of the few places you can have multiple options of the same name.
#
# Some of the scanner(s) require 3rd party software and libraries eg clamav.
# See the individual plugin conf file for more options (if any).
#
#contentscanner = '/etc/e2guardian/contentscanners/clamdscan.conf'
#contentscanner = '/etc/e2guardian/contentscanners/icapscan.conf'
#contentscanner = '/etc/e2guardian/contentscanners/commandlinescan.conf'

# Content scanner timeout
# Some of the content scanners support using a timeout value to stop
# processing (eg AV scanning) the file if it takes too long.
# If supported this will be used.
# The default of 60 seconds is probably reasonable.
contentscannertimeout = 60

# Content scan exceptions // THIS MOVED to e2guardianf1.conf
# contentscanexceptions = off

# Auth plugins
# 
# Handle the extraction of client usernames from various sources, such as
# Proxy-Authorisation headers and ident servers, enabling requests to be
# handled according to the settings of the user's filter group.
#
# If you do not use multiple filter groups, you need not specify this option.
#
#authplugin = '/etc/e2guardian/authplugins/proxy-basic.conf'
#authplugin = '/etc/e2guardian/authplugins/proxy-digest.conf'
#authplugin = '/etc/e2guardian/authplugins/proxy-ntlm.conf'
#authplugin = '/etc/e2guardian/authplugins/ident.conf'
#authplugin = '/etc/e2guardian/authplugins/ip.conf'
#authplugin = '/etc/e2guardian/authplugins/proxy-header.conf'
#authplugin = '/etc/e2guardian/authplugins/port.conf'

# Map auth to ports
# If enabled map auth plugins to ips/ports - number of authplugins must then be same as
# number of ports
# If disabled scan authplugins on all ports - number of authplugins can then be different
#  to number of ports
# on (default) | off
#mapauthtoports = off

# Re-check replaced URLs
# As a matter of course, URLs undergo regular expression search/replace (urlregexplist)
# *after* checking the exception site/URL/regexpURL lists, but *before* checking against
# the banned site/URL lists, allowing certain requests that would be matched against the
# latter in their original state to effectively be converted into grey requests.
# With this option enabled, the exception site/URL/regexpURL lists are also re-checked
# after replacement, making it possible for URL replacement to trigger exceptions based
# on them.
# Defaults to off.
recheckreplacedurls = off

# Misc settings

# if on it adds an X-Forwarded-For: <clientip> to the HTTP request
# header.  This may help solve some problem sites that need to know the
# source ip. on | off
forwardedfor = on

# if on it uses the X-Forwarded-For: <clientip> to determine the client
# IP. This is for when you have squid between the clients and e2guardian.
# Warning - headers are easily spoofed. on | off
usexforwardedfor = on

# as mentioned above, the headers can be easily spoofed in order to fake the
# request origin by setting the X-Forwarded-For header. If you have the
# "usexforwardedfor" option enabled, you may want to specify the IPs from which
# this kind of header is allowed, such as another upstream proxy server for
# instance If you want authorize multiple IPs, specify each one on an individual
# xforwardedforfilterip line.
# xforwardedforfilterip = 

# if on it logs some debug info regarding accept()ing and failed connections
# which
# can usually be ignored.  These are logged by syslog.  It is safe to leave
# it on or off
logconnectionhandlingerrors = on

#sets the number of worker threads to use 
#
# This figure is the maximum number of concurrent connections.
# If more connections are made, connections will queue until a worker thread is free.
# On large site you might want to try 5000 (max value 20000)
httpworkers = 500 

# Process options
# (Change these only if you really know what you are doing).
# These options allow you to run multiple instances of e2guardian on a single machine.
# Remember to edit the log file path above also if that is your intention.

# PID filename
# 
# Defines process id directory and filename.
#pidfilename = '/var/run/e2guardian.pid'

# Disable daemoning
# If enabled the process will not fork into the background.
# It is not usually advantageous to do this.
# on|off (defaults to off)
nodaemon = off

# Disable logging process
# on|off (defaults to off)
nologger = off

# Enable logging of "ADs" category blocks
# on|off (defaults to off)
logadblocks = off

# Enable logging of client User-Agent
# Some browsers will cause a *lot* of extra information on each line!
# on|off (defaults to off)
loguseragent = off

# Daemon runas user and group
# This is the user that e2guardian runs as.  Normally the user/group nobody.
# Uncomment to use.  Defaults to the user set at compile time.
# Temp files created during virus scanning are given owner and group read

# clamdscan, the two processes must run with either the same group or user ID.
#daemonuser = 'e2guardian'
#daemongroup = 'e2guardian'


# Mail program
# Path (sendmail-compatible) email program, with options.
# Not used if usesmtp is disabled (filtergroup specific).
#mailer = '/usr/sbin/sendmail -t'   # NOT YET IMPLIMENTED

# Enable SSL support  
# This must be present to enable MITM and/or Cert checking
# default is off
enablessl = off

#SSL certificate checking path
#Path to CA certificates used to validate the certificates of https sites.
# if left blank openssl default ca certificate bundle will be used
#Leave as default unless you want to load non-default cert bundle
#sslcertificatepath = ''

#SSL man in the middle
#CA certificate path
#Path to the CA certificate to use as a signing certificate for 
#generated certificates.
# default is blank - required if ssl_mitm is enabled.
#cacertificatepath = '/home/e2/e2install/ca.pem'

#CA private key path
#path to the private key that matches the public key in the CA certificate.
# default is blank - required if ssl_mitm is enabled.
#caprivatekeypath = '/home/e2/e2install/ca.key'

#Cert private key path
#The public / private key pair used by all generated certificates
# default is blank - required if ssl_mitm is enabled.
#certprivatekeypath = '/home/e2/e2install/cert.key'

#Generated cert path
#The location where generated certificates will be saved for future use.
#(must be writable by the dg user)
# default is blank - required if ssl_mitm is enabled.
#generatedcertpath = '/home/e2/e2install/generatedcerts/'

#Warning: if you change the cert start/end time from default on a running 
#         system you will need to clear the generated certificate 
#         store and also may get problems on running client browsers

#Generated cert start time (in unix time) - optional
# defaults to 1417872951 = 6th Dec 2014
# generatedcertstart = 1417872951 

#Generated cert end time (in unix time) - optional
# defaults to generatedcertstart + 10 years
#genratedcertend =
# generatedcertstart =

# monitor helper path 
# If defined this script/binary will be called with start or stop appended as follows:-
# Note change in V4!!! - No longer detects cache failure
# At start after e2guardian has started listener and worker threads with 
# ' start' appended
# When e2guardian is stopping with ' stop' appended
# monitorhelper = '/usr/local/bin/mymonitor'  

# monitor flag prefix path 
# If defined path will be used to generate flag files as follows:-
# 
# At start after e2guardian has started listener and worker threads with 
#  'running' appended
# When e2guardian is stopping with 'paused' appended
# Note change in V4!!! - No longer detects cache failure
# monitorflagprefix = '/home/e2g/run/e2g_flag_'

# Much logic has moved to storyboard files
preauthstoryboard = '/etc/e2guardian/preauth.story'

# Storyboard tracing
# Warning - produces verbose output - do not use in production
# Output goes to syslog (or stderr in debug mode)
# default off
# storyboardtrace = off

# Abort if list is missing or unreadable
# default is to warn but then ignore missing lists
# To abort on missing list set to on
# abortiflistmissing = off  //NOT YET IMPLIMENTED

#Search sitelist for ip sites
# In v5 a separate set of lists has been introduced for IP sites
# and normally e2g will no longer check site lists for ip's
# If you want to keep backward list compatablity then set this to
# 'on' - but note this incurs an overhead - putting IP in ipsitelists
# and setting this to off gives the fastest implimentation.
# default is 'on'
searchsitelistforip = on


# http header checking setings
#
# Limit number of http header lines in a request/response
# (to guard against attacks)
# Minimum 10 max 250
# default 50
# maxheaderlines = 50