Bogofilter FAQ

Unsure · knows

    bogofilter -s < spam.mbox
    bogofilter -n < ham.mbox
    bogominitrain.pl -fnv ~/.bogofilter ham.mbox spam.mbox '-o 0.9,0.3'
    randomtrain -s spam.mbox -n ham.mbox
    #! /bin/sh
    #  class3 -- classify one message as bad, good or unsure
    cat >msg.$$
    bogofilter $* <msg.$$
    res=$?
    if [ $res = 0 ]; then
        cat msg.$$ >>corpus.bad
    elif [ $res = 1 ]; then
        cat msg.$$ >>corpus.good
    elif [ $res = 2 ]; then
        cat msg.$$ >>corpus.unsure
    fi
    rm msg.$$
    #! /bin/sh
    # classify -- put all messages in mbox through class3
    src=$1;
    shift
    formail -s class3 $* <$src
    classify spam.mbox [bogofilter options]
    bogofilter -s < corpus.good
    rm -f corpus.*
    classify ham.mbox [bogofilter options]
    bogofilter -n < corpus.bad
    rm -f corpus.*
    bogofilter -M -s -I ~/mail/Spam
    bogofilter -M -n -I ~/mail/NonSpam
    bogofilter -s -B ~/Maildir/.Spam
    bogofilter -n -B ~/Maildir/.NonSpam
    bogofilter -M -Ns -I ~/mail/Missed_Spam
    bogofilter -M -Sn -I ~/mail/False_Spam
    bogofilter -s -B ~/Maildir/.Missed_Spam
    bogofilter -n -B ~/Maildir/.False_Spam
    mailtool copy /full/path/to/mail.mbox '#driver.unix//full/path/to/mbox'
    for MSG in /full/path/to/maildir/* ; do 
        formail -I Status: < "$MSG" >> /full/path/to/mbox
    done
    X-Bogosity: Ham, tests=bogofilter, spamicity=0.500000
    X-Bogosity: Ham, tests=bogofilter, spamicity=0.500000
      int  cnt    prob   spamicity  histogram
     0.00   29  0.000209  0.000052  #############################
     0.10    2  0.179065  0.003425  ##
     0.20    2  0.276880  0.008870  ##
     0.30   18  0.363295  0.069245  ##################
     0.40    0  0.000000  0.069245
     0.50    0  0.000000  0.069245
     0.60   37  0.667823  0.257307  #####################################
     0.70    5  0.767436  0.278892  #####
     0.80   13  0.836789  0.334980  #############
     0.90   32  0.984903  0.499835  ################################
    X-Bogosity: Ham, tests=bogofilter, spamicity=0.500000
                          n    pgood     pbad      fw     U
    "which"              10  0.208333  0.000000  0.000041 +
    "own"                 7  0.145833  0.000000  0.000059 +
    "having"              6  0.125000  0.000000  0.000069 +
    ...
    "unsubscribe.asp"     2  0.000000  0.095238  0.999708 +
    "million"             4  0.000000  0.190476  0.999854 +
    "copy"                5  0.000000  0.238095  0.999883 +
    N_P_Q_S_s_x_md      138  0.00e+00  0.00e+00  5.00e-01
                             1.00e-03  4.15e-01  0.100
    #### CUTOFF Values
    #
    #    both ham_cutoff and spam_cutoff are allowed.
    #    setting ham_cutoff to a non-zero value will
    #    enable tri-state results (Spam/Ham/Unsure).
    #
    #ham_cutoff  = 0.45
    #spam_cutoff = 0.99
    #
    #    for two-state classification:
    #
    ## ham_cutoff = 0.00
    ## spam_cutoff= 0.99
    ## spamicity_tags = Yes, No, Unsure
    if header contains "X-Bogosity: Spam", put in Spam folder
    if header contains "X-Bogosity: Unsure", put in Unsure folder
    #### SPAM_SUBJECT_TAG
    #
    #    tag added to "Subject: " line for identifying spam or unsure
    #    default is to add nothing.
    #
    ##spam_subject_tag=***SPAM***
    ##unsure_subject_tag=???UNSURE???
    if subject contains "***SPAM***", put in Spam folder
    if subject contains "???UNSURE???", put in Unsure folder
    BOGOFILTER     = "/usr/bin/bogofilter"
    BOGOFILTER_DIR = "training"
    SPAMASSASSIN  = "/usr/bin/spamassassin"

    :0 HBc
    * ? $SPAMASSASSIN -e
    #spam yields non-zero
    #non-spam yields zero
    | $BOGOFILTER -n -d $BOGOFILTER_DIR
    #else (E)
    :0Ec
    | $BOGOFILTER -s -d $BOGOFILTER_DIR

    :0fw
    | $BOGOFILTER -p -e

    :0:
    * ^X-Bogosity:.Spam
    spam

    :0:
    * ^X-Bogosity:.Ham
    non-spam
    ## Silently drop all Asian language mail
    UNREADABLE='[^?"]*big5|iso-2022-jp|ISO-2022-KR|euc-kr|gb2312|ks_c_5601-1987'
    :0:
    * 1^0 $ ^Subject:.*=\?($UNREADABLE)
    * 1^0 $ ^Content-Type:.*charset="?($UNREADABLE)
    spam-unreadable

    :0:
    * ^Content-Type:.*multipart
    * B ?? $ ^Content-Type:.*^?.*charset="?($UNREADABLE)
    spam-unreadable
    bf_compact ~/.bogofilter wordlist.db
    cd ~/.bogofilter
    bogoutil -d wordlist.db | bogoutil -l wordlist.db.new
    mv wordlist.db wordlist.db.prv
    mv wordlist.db.new wordlist.db
    wordlist R,user,~/wordlist.db,1
    wordlist R,system,/var/spool/bogofilter/wordlist.db,1
    wordlist R,user,~/wordlist.db,2
    wordlist R,system,/var/spool/bogofilter/wordlist.db,3
    wordlist R,user,~/wordlist.db,5
    wordlist R,system,/var/spool/bogofilter/wordlist.db,4
    wordlist I,ignore,~/ignorelist.db,7
    wordlist R,system,/var/spool/bogofilter/wordlist.db,8
  echo ignore.me | bogoutil -l ~/ignorelist.db
    db_verify wordlist.db
    bogoutil -d wordlist.db | bogoutil -l wordlist.new.db
    db_dump -r wordlist.db > wordlist.txt
    db_load wordlist.new.db < wordlist.txt
    bogoutil -d wordlist.db > wordlist.txt
    bogoutil -l wordlist.db.new < wordlist.txt
    bogoutil -d wordlist.db > wordlist.raw.txt
    iconv -f iso-8859-1 -t utf-8 < wordlist.raw.txt > wordlist.utf8.txt
    bogoutil -l wordlist.db.new < wordlist.utf8.txt
    bogoutil --unicode=yes -m wordlist.db
    bogoutil -d wordlist.db > wordlist.utf8.txt
    iconv -f utf-8  -t iso-8859-1 < wordlist.utf8.txt > wordlist.raw.txt
    bogoutil -l wordlist.db.new < wordlist.raw.txt
    bogoutil --unicode=no -m wordlist.db
    cd ~/.bogofilter
    bogoutil -d wordlist.db > wordlist.txt
    mv wordlist.db wordlist.db.old
    bogoutil --db-transaction=yes -l wordlist.db < wordlist.txt
    rm wordlist.db.old wordlist.txt
    cd ~/.bogofilter
    bogoutil -d wordlist.db > wordlist.txt
    mv wordlist.db wordlist.db.old
    rm -f log.?????????? __db.???
    bogoutil --db-transaction=no -l wordlist.db < wordlist.txt
  bogoutil --db-recover /your/bogofilter/directory
    ls -lh $BOGOFILTER_DIR/wordlist.db
    postconf | grep mailbox_size_limit
    postconf -e mailbox_size_limit=73000000
    bogoutil -d wordlist.db | \
    awk '{print $1 " " $2 " 0"}' | grep -v " 0 0" | \
    bogoutil -l wordlist.new.db
    bogoutil -d wordlist.db | \
    awk '{print $1 " 0 " $3}' | grep -v " 0 0" | \
    bogoutil -l wordlist.new.db
    $ cd build_unix
    $ sh ../dist/configure
    $ make
    # make install
    $ ./configure --with-libdb-prefix=/usr/local/BerkeleyDB.4.4
    $ make
    # make install-strip
    $ LD_LIBRARY_PATH=/usr/lib:/usr/local/lib:/usr/local/BerkeleyDB.4.4
    $ export LD_LIBRARY_PATH
    pkg install -y bogofilter
    pkg install -y portmaster
    portmaster mail/bogofilter
    macro index S "|bogofilter -s\ns=junkmail"  "Learn as spam and save to junk"
    macro pager S "|bogofilter -s\ns=junkmail"  "Learn as spam and save to junk"
    macro index H "|bogofilter -n\ns="          "Learn as ham and save"
    macro pager H "|bogofilter -n\ns="          "Learn as ham and save"
    condition:
    * test "bogofilter < %F"
    action:
    * move "#mh/YOUR_SPAM_BOX"
    Mark as ham / spam:
    * bogofilter -n -v -B "%f" (mark ham)
    * bogofilter -s -v -B "%f" (mark spam)
    #!/bin/sh
    CONFIGDIR=~/.bogofilter
    SPAMDIRS="$CONFIGDIR/spamdirs"
    MARKFILE="$CONFIGDIR/lastbogorun"
    for D in `cat "$SPAMDIRS"`; do
        find "$D" -type f -newer "$MARKFILE" -not -name ".sylpheed*"
    done|bogofilter -bNsv
    touch "$MARKFILE"
    Condition:
        header "X-Bogosity" matchcase "Spam"
    Action:
        move "#mh/Mailbox/Spam"
    Condition:
        header "X-Bogosity" matchcase "Unsure"
    Action:
        move "#mh/Mailbox/Unsure"
    Register Spam:
        bogofilter -s < "%f"

    Register Ham:
        bogofilter -n < "%f"

    Unregister Spam:
        bogofilter -S < "%f"

    Unregister Ham:
        bogofilter -N < "%f"
    BogoTest -vv:
        bogofilter -vv < "%f"

    BogoTest -vvv:
        bogofilter -vvv < "%f"
;; load bogofilter capabilities (spam)
;;
(require 'vm-bogofilter)

;; short-key for bogofilter
;; C (shift-c) means spam message
;; K (shift-k) means ham message
(define-key vm-mode-map "K" 'vm-bogofilter-is-spam)
(define-key vm-mode-map "C" 'vm-bogofilter-is-clean)

Bogofilter FAQ

Typographic conventions

Frequently asked questions and their answers

What is bogofilter?

Bogo-what?

How does bogofilter work?

Mailing Lists

How do I start my bogofilter training?

Comparing these methods

How do I train using maildirs?

Initial training from mbox:

Initial training from maildir:

Corrective training from mbox:

Corrective training from maildir:

How can I keep the scoring accuracy high?

What mailbox (file) formats does bogofilter understand?

What does bogofilter's verbose output mean?

What is Unsure mode?

What are "training on error" and "training to exhaustion"?

What does the '-u' (autoupdate) switch do?

How can I use SpamAssassin to train Bogofilter?

What can I do about Asian spam?

How can I compact my database?

How do I manually query the database?

Can I use multiple wordlists?

Can I tell bogofilter to ignore certain tokens?

How do I upgrade from separate word databases to the combined wordlist format?

How can I tell if my wordlists are corrupted?

How can I convert my wordlist to/from unicode?

How can I switch from non-transaction to transaction mode?

How can I switch from transaction to non-transaction mode?

Why does bogofilter die after printing "Lock table is out of available locks" or "Lock table is out of available object entries"

Why does bogofilter crash with "File size limit exceeded"?

Why am I getting DB_PAGE_NOTFOUND messages?

Why am I getting "Berkeley DB library configured to support only DB_PRIVATE environments" or "Berkeley DB library configured to support only private environments"?

Can bogofilter be used in a multi-user environment?

Can I share wordlists over NFS?

Why does bogofilter give return codes like 0 and 256 when it's run from inside a program?

Now that I've upgraded why are my scripts broken?

Now that I've upgraded why is bogofilter working less well?

How can I delete all the spam (or non-spam) tokens?

How do I get bogofilter working on Solaris, BSD, etc?

On Solaris

On FreeBSD

On NetBSD and other systems that use "pkgsrc"

On HP-UX

Can I use the make command on my operating system?

How do I build bogofilter as non-root user or for a non-standard installation prefix?

How do I build bogofilter with patches?

How do I make the executables smaller?

datastore_db.c does not compile!

With which mail programs does bogofilter work?

How do I use bogofilter with mutt?

How do I use bogofilter with Sylpheed Claws?

How do I use bogofilter with VM (an Emacs Mail tool)?

How do I use bogofilter with MH-E (the Emacs interface to the MH mail system)?

Why am I getting "Berkeley DB library configured to support only DB_PRIVATE environments" or
"Berkeley DB library configured to support only private environments"?