--- fuzzyocr-2.3b.orig/FuzzyOcr.cf +++ fuzzyocr-2.3b/FuzzyOcr.cf @@ -1,4 +1,8 @@ -loadplugin FuzzyOcr FuzzyOcr.pm +loadplugin Mail::SpamAssassin::Plugin::FuzzyOcr + + +ifplugin Mail::SpamAssassin::Plugin::FuzzyOcr + body FUZZY_OCR eval:fuzzyocr_check() describe FUZZY_OCR Mail contains an image with common spam text inside body FUZZY_OCR_WRONG_CTYPE eval:dummy_check() @@ -16,13 +20,13 @@ # Verbosity level (see manual) Attention: Don't set to 0, but to 0.0 for quiet operation. (Default value: 1) #focr_verbose 1 # -# Logfile (make sure it is writable by the plugin) (Default value: /etc/mail/spamassassin/FuzzyOcr.log) -focr_logfile /etc/mail/spamassassin/FuzzyOcr.log +# Logfile (make sure it is writable by the plugin) (Default value: /var/log/spamassassin/FuzzyOcr.log) +focr_logfile /dev/null ########################## ##### Wordlists ##### -# Here we defined the words to scan for (Default value: /etc/mail/spamassassin/FuzzyOcr.words) -focr_global_wordlist /etc/mail/spamassassin/FuzzyOcr.words +# Here we defined the words to scan for (Default value: /etc/spamassassin/FuzzyOcr.words) +focr_global_wordlist /etc/spamassassin/FuzzyOcr.words # # This is the path RELATIVE to the respektive home directory for the personalized list # This list is merged with the global word list on execution (Default value: .spamassassin/fuzzyocr.words) @@ -106,10 +110,13 @@ # # The score is saved with the hash in the database, so no extra scoring for a db hit is required. # -# If the image hash database feature is enabled, specify the file here to use as database (Default value: /etc/mail/spamassassin/FuzzyOcr.hashdb) -#focr_digest_db /etc/mail/spamassassin/FuzzyOcr.hashdb +# If the image hash database feature is enabled, specify the file here to use as database (Default value: /var/lib/spamassassin/FuzzyOcr.hashdb) +#focr_digest_db /var/lib/spamassassin/FuzzyOcr.hashdb # # Automatically add hashes of spam images recognized by OCR to the Image Hash database, to disable, set to 0.0 (Default value: 1) #focr_hashing_learn_scanned 1 # ###################################################################### + + +endif \ No newline at end of file --- fuzzyocr-2.3b.orig/FuzzyOcr.words.sample +++ fuzzyocr-2.3b/FuzzyOcr.words.sample @@ -14,6 +14,7 @@ money::0 million thousand +portfolio buy price::0.2 trade --- fuzzyocr-2.3b.orig/FuzzyOcr.pm +++ fuzzyocr-2.3b/FuzzyOcr.pm @@ -41,7 +41,7 @@ # # written by Christian Holler decoder_at_own-hero_dot_net -package FuzzyOcr; +package Mail::SpamAssassin::Plugin::FuzzyOcr; use strict; use warnings; @@ -104,9 +104,9 @@ our $grep = "/bin/grep"; our $max_images = 5; our $dscore = 10; -our $logfile = "/etc/mail/spamassassin/FuzzyOcr.log"; +our $logfile = "/var/log/spamassassin/FuzzyOcr.log"; our $pwordlist = ".spamassassin/fuzzyocr.words"; -our $digest_db = "/etc/mail/spamassassin/FuzzyOcr.hashdb"; +our $digest_db = "/var/lib/spamassassin/FuzzyOcr.hashdb"; our @scansets = ( '$gocr -i -', '$gocr -l 180 -d 2 -i -' @@ -236,7 +236,7 @@ sub load_global_words { unless ( -r $_[0] ) { - handle_error( $err_msges[3], ( $_[0] ) ); + handle_error( $err_msges[4], ( $_[0] ) ); return; } open WORDLIST, "<$_[0]"; --- fuzzyocr-2.3b.orig/debian/rules +++ fuzzyocr-2.3b/debian/rules @@ -0,0 +1,82 @@ +#!/usr/bin/make -f +# Sample debian/rules that uses debhelper. +# This file is public domain software, originally written by Joey Hess. +# +# This version is for packages that are architecture independent. + +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 + +build: build-stamp +build-stamp: + dh_testdir + + # Add here commands to compile the package. + #$(MAKE) + + touch build-stamp + +clean: + dh_testdir + dh_testroot + rm -f build-stamp + + # Add here commands to clean up after the build process. + #-$(MAKE) clean + #-$(MAKE) distclean + + dh_clean + +install: build + dh_testdir + dh_testroot + dh_clean -k + dh_installdirs + + # Add here commands to install the package into debian/. + #$(MAKE) prefix=`pwd`/debian/`dh_listpackages`/usr install + mkdir --parents `pwd`/debian/fuzzyocr/usr/share/perl5/Mail/SpamAssassin/Plugin + cp FuzzyOcr.pm `pwd`/debian/fuzzyocr/usr/share/perl5/Mail/SpamAssassin/Plugin/ + mkdir --parents `pwd`/debian/fuzzyocr/etc/spamassassin + cp FuzzyOcr.words.sample `pwd`/debian/fuzzyocr/etc/spamassassin/FuzzyOcr.words + #by this trick, when the package is removed but not purged, + # the symlink will disappear and spamassassin will ignore the real conf file + cp FuzzyOcr.cf `pwd`/debian/fuzzyocr/etc/spamassassin/FuzzyOcr.cf.real + ln -s FuzzyOcr.cf.real `pwd`/debian/fuzzyocr/etc/spamassassin/FuzzyOcr.cf + + +# Build architecture-independent files here. +binary-indep: build install + dh_testdir + dh_testroot + dh_installchangelogs + dh_installdocs + dh_installexamples +# dh_installmenu +# dh_installdebconf +# dh_installlogrotate +# dh_installemacsen +# dh_installcatalogs +# dh_installpam +# dh_installmime +# dh_installinit +# dh_installcron +# dh_installinfo +# dh_undocumented + dh_installman + dh_link + dh_compress + dh_fixperms +# dh_perl +# dh_python + dh_installdeb + dh_gencontrol + dh_md5sums + dh_builddeb + +# Build architecture-dependent files here. +binary-arch: build install +# We have nothing to do by default. + +binary: binary-indep binary-arch +.PHONY: build clean binary-indep binary-arch binary install --- fuzzyocr-2.3b.orig/debian/control +++ fuzzyocr-2.3b/debian/control @@ -0,0 +1,22 @@ +Source: fuzzyocr +Section: mail +Priority: extra +Maintainer: A Mennucc1 +Build-Depends: debhelper (>= 5) +Standards-Version: 3.7.2 + +Package: fuzzyocr +Architecture: all +Depends: spamassassin (>= 3), gocr | ocrad, netpbm, libungif-bin (>= 4), imagemagick, libstring-approx-perl, libdigest-md5-perl +Description: spamassassin plugin to check image attachments + This Spamassassin plugin checks for specific keywords in image/gif, + image/jpeg or image/png attachments, using gocr (an optical character + recognition program). + This plugin can be used to detect spam that puts all the real spam + content in an attached image, while the mail itself is only random + text and random html, without any URL's or identifiable information. + Additionally to the normal OcrPlugin, it can do approximate matches on + words, so errors in recognition or attempts to obfuscate the text + inside the image will not cause the detection to fail. Another + improvement was to move the wordlist into the configuration file so it + can be easily extended. --- fuzzyocr-2.3b.orig/debian/watch +++ fuzzyocr-2.3b/debian/watch @@ -0,0 +1,23 @@ +# Example watch control file for uscan +# Rename this file to "watch" and then you can run the "uscan" command +# to check for upstream updates and more. +# See uscan(1) for format + +# Compulsory line, this is a version 3 file +version=3 + +# Uncomment to examine a Webpage +# +http://fuzzyocr.own-hero.net/wiki/Downloads fuzzyocr-(.*)\.tar\.gz + +# Uncomment to examine a Webserver directory +#http://www.example.com/pub/fuzzyocr-(.*)\.tar\.gz +http://users.own-hero.net/~decoder/fuzzyocr/fuzzyocr-(.*)\.tar\.gz + +# Uncommment to examine a FTP server +#ftp://ftp.example.com/pub/fuzzyocr-(.*)\.tar\.gz debian uupdate + +# Uncomment to find new files on sourceforge, for debscripts >= 2.9 +# http://sf.net/fuzzyocr/fuzzyocr-(.*)\.tar\.gz + + --- fuzzyocr-2.3b.orig/debian/dirs +++ fuzzyocr-2.3b/debian/dirs @@ -0,0 +1,2 @@ +usr/bin +usr/sbin --- fuzzyocr-2.3b.orig/debian/docs +++ fuzzyocr-2.3b/debian/docs @@ -0,0 +1 @@ +FAQ --- fuzzyocr-2.3b.orig/debian/README.Debian +++ fuzzyocr-2.3b/debian/README.Debian @@ -0,0 +1,74 @@ +fuzzyocr for Debian +------------------- + + +--- config file + +The main config file is installed in /etc/spamassassin/FuzzyOcr.cf.real + +When the package is installed, there is +a symlink FuzzyOcr.cf -> FuzzyOcr.cf.real +(so, when the package is removed, but not purged, then the +symlink is absent, and spamassassin does not try to +initialize the plugin). + +--- spamc/spamd + +In the main config file, the settings for focr_logfile and +focr_digest_db do not make sense when an user is using spamc/spamd (as +I do). Currently both are then disabled. This way, FuzzyOcr works +out-of-the-box with spamc/spamd + +It is still possible, though, for an user to use those features; +for example, I added into /home/debdev/.spamassassin/user_prefs + focr_verbose 2 + focr_logfile /home/debdev/var/FuzzyOcr.log + focr_enable_image_hashing 1 + focr_digest_db /home/debdev/var/FuzzyOcr.hashdb + + + -- A Mennucc , Sat, 6 Jan 2007 10:00:24 +0100 + +This following is an upstream introduction to FuzzyOcr: + +FuzzyOcr is a plugin for SpamAssassin which is aimed at unsolicited +bulk mail (also known as "Spam") containing images as the main content +carrier. Using different methods, it analyzes the content and +properties of images to distinguish between normal mails (Ham) and +spam mails. The methods mainly are: + + * Optical Character Recognition using different engines and settings + * Fuzzy word matching algorithm applied to OCR results + * Image hashing system to learn unique properties of known spam images + * Dimension, size and integrity checking of images + * Content-Type verification for the containing email + +For a brief description of features, resource aspects and scalability, + see the detailed list below: + + * Matching and learning techniques + o Flexible Optical Character Recognition interface + + Official Support for gocr and ocrad + + Generic support for TesserAct and others upcoming + (planned for 3.5) + o Fuzzy word matching algorithm applied to OCR results + o Recognition of duplicate (already processed) or similar images + using feature vectors (Hashing) + + Efficient MLDBM database + + Mysql Support (planned for 3.5) + o Dimension, size and integrity checking + o Content-Type checking of containing email + + * Resource saving techniques + o Only scan mails which where not recognized yet as Ham or Spam + by other SpamAssassin rules or plugins (using score thresholds) + o Optional skip of other scanning facilities once one scores + already with a given threshold (planned for 3.5) + o Mail skipping based on direct feature analysis + (Dimensions and file size) (planned for 3.5) + + * Safety measures + o Configurable timeout against Denial of Service attacks against + the third party tools + o Context based word sets instead of simple lists to prevent + false positives (planned for 3.5) --- fuzzyocr-2.3b.orig/debian/copyright +++ fuzzyocr-2.3b/debian/copyright @@ -0,0 +1,191 @@ +This package was debianized by A Mennucc1 on +Thu, 30 Nov 2006 11:13:32 +0100. + +It was downloaded from http://fuzzyocr.own-hero.net/ +see also http://wiki.apache.org/spamassassin/FuzzyOcrPlugin + +Upstream Author: Christian Holler + +Copyright: Christian Holler and Jorge Valdes + +The Debian packaging is (C) 2006, A Mennucc and +is licensed under the GPL, see `/usr/share/common-licenses/GPL'. + + +fuzzyocr license: + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + --- fuzzyocr-2.3b.orig/debian/changelog +++ fuzzyocr-2.3b/debian/changelog @@ -0,0 +1,21 @@ +fuzzyocr (2.3b-2) unstable; urgency=low + + * Bug fix: "fuzzyocr: Misleading error message", thanks to Adam Porter + (Closes: #404632). + * Bug fix: "fuzzyocr: Perl module path is hardcoded", thanks to Adam + Porter (Closes: #404627); and also + * FuzzyOcr.cf : set focr_global_wordlist to /etc/spamassassin/FuzzyOcr.words + thanks to Michael Holtermann + * added "portfolio" to sample spam words + + -- A Mennucc1 Sat, 6 Jan 2007 10:01:26 +0100 + +fuzzyocr (2.3b-1) unstable; urgency=low + + * Initial release (Closes: #397615) + * Attention: the gocr binary has a bug which can cause segfaults + with specific images. This may turn out to be a security threat. + Use this sw with caution. + + -- A Mennucc1 Thu, 30 Nov 2006 11:13:32 +0100 + --- fuzzyocr-2.3b.orig/debian/fuzzyocr.postinst +++ fuzzyocr-2.3b/debian/fuzzyocr.postinst @@ -0,0 +1,12 @@ +#!/bin/sh +set -e + +if [ -x "/etc/init.d/spamassassin" ]; then + if [ -x "`which invoke-rc.d 2>/dev/null`" ]; then + invoke-rc.d spamassassin restart || exit $? + else + /etc/init.d/spamassassin restart || exit $? + fi +fi + +#DEBHELPER# --- fuzzyocr-2.3b.orig/debian/compat +++ fuzzyocr-2.3b/debian/compat @@ -0,0 +1 @@ +5 --- fuzzyocr-2.3b.orig/debian/fuzzyocr.postrm +++ fuzzyocr-2.3b/debian/fuzzyocr.postrm @@ -0,0 +1,12 @@ +#!/bin/sh +set -e + +if [ -x "/etc/init.d/spamassassin" ]; then + if [ -x "`which invoke-rc.d 2>/dev/null`" ]; then + invoke-rc.d spamassassin restart || exit $? + else + /etc/init.d/spamassassin restart || exit $? + fi +fi + +#DEBHELPER# --- fuzzyocr-2.3b.orig/INSTALL +++ fuzzyocr-2.3b/INSTALL @@ -61,10 +61,10 @@ 2.1. Installing the required files - Put the FuzzyOcr.cf and the FuzzyOcr.pm files into /etc/mail/spamassassin. + Put the FuzzyOcr.cf and the FuzzyOcr.pm files into /etc/spamassassin. The FuzzyOcr.cf file already contains a line to load the plugin, if you want to put the .pm file in a different location, change this line accordingly. - Create a wordlist file, a sample wordlist is shipped with this release, and put it also in /etc/mail/spamassassin. + Create a wordlist file, a sample wordlist is shipped with this release, and put it also in /etc/spamassassin. 2.2 Necessary configuration