--- urlgrabber-2.9.7.orig/.bzr/revision-store/cc/wildfire@progsoc.org-20051228065225-163b81689521a533
+++ urlgrabber-2.9.7/.bzr/revision-store/cc/wildfire@progsoc.org-20051228065225-163b81689521a533
@@ -0,0 +1,4 @@
+
+Inject urlgrabber-2.9.6.tar.gz
+
+
--- urlgrabber-2.9.7.orig/.bzr/revision-store/ba/wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
+++ urlgrabber-2.9.7/.bzr/revision-store/ba/wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
@@ -0,0 +1,7 @@
+
+Inject urlgrabber-2.9.7.tar.gz
+
+
+
+
+
--- urlgrabber-2.9.7.orig/.bzr/revision-store/fd/wildfire@progsoc.org-20051231042934-0a9043755ce47970
+++ urlgrabber-2.9.7/.bzr/revision-store/fd/wildfire@progsoc.org-20051231042934-0a9043755ce47970
@@ -0,0 +1,7 @@
+
+Add in Debian directory
+
+
+
+
+
--- urlgrabber-2.9.7.orig/.bzr/revision-store/c4/wildfire@progsoc.org-20051231043053-cf6e6cd84c74f297
+++ urlgrabber-2.9.7/.bzr/revision-store/c4/wildfire@progsoc.org-20051231043053-cf6e6cd84c74f297
@@ -0,0 +1,8 @@
+
+Bring in new upstream
+
+
+
+
+
+
--- urlgrabber-2.9.7.orig/.bzr/revision-store/0c/wildfire@progsoc.org-20051231043436-2e0cbd0e13fec9b4
+++ urlgrabber-2.9.7/.bzr/revision-store/0c/wildfire@progsoc.org-20051231043436-2e0cbd0e13fec9b4
@@ -0,0 +1,7 @@
+
+New upstream release
+
+
+
+
+
--- urlgrabber-2.9.7.orig/.bzr/revision-store/7b/wildfire@progsoc.org-20051231072726-04b5b3354a60ed0e
+++ urlgrabber-2.9.7/.bzr/revision-store/7b/wildfire@progsoc.org-20051231072726-04b5b3354a60ed0e
@@ -0,0 +1,7 @@
+
+Add Build-Dep on python
+
+
+
+
+
--- urlgrabber-2.9.7.orig/.bzr/weaves/b5/ChangeLog-20051228065045-db7621284620be5b.weave
+++ urlgrabber-2.9.7/.bzr/weaves/b5/ChangeLog-20051228065045-db7621284620be5b.weave
@@ -0,0 +1,1203 @@
+# bzr weave file v5
+i
+1 262cf5a2ec1333b395e3b84df9ea1d1faf6f7d66
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+i 0
+1 e5bdfde6f86c0ef111f745099073880105004325
+n wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
+
+w
+{ 0
+{ 1
+. 2005-10-22 Michael D. Stenner
+.
+. * ChangeLog, TODO, urlgrabber/__init__.py:
+.
+. Updated TODO and preparing for release.
+.
+. 2005-10-22 Michael D. Stenner
+.
+. * test/test_grabber.py, test/test_keepalive.py,
+. test/test_mirror.py, test/support/testdata/test_post.php,
+. urlgrabber/byterange.py, urlgrabber/grabber.py,
+. urlgrabber/keepalive.py, urlgrabber/mirror.py:
+.
+. Added some slick logging support to urlgrabber. It can be
+. controlled by the calling application or enabled from an
+. environment variable.
+.
+. Allowed for HTTP POSTs by passing in the kwarg "data", just as for
+. urllib2.
+.
+. 2005-08-19 Michael D. Stenner
+.
+. * urlgrabber/progress.py:
+.
+. Another minor simplification offered by the WTF folks.
+.
+. 2005-08-18 Michael D. Stenner
+.
+. * urlgrabber/progress.py:
+.
+. Fixed a minor formatting bug, helpfully pointed out by the nice
+. folks at the daily WTF:
+. http://thedailywtf.com/forums/41204/ShowPost.aspx
+.
+. 2005-08-17 Michael D. Stenner
+.
+. * test/test_grabber.py, urlgrabber/grabber.py:
+.
+. Added Menno's idea for catching KeyboardInterrupt. This allows the
+. program to do things like hit the next retry, jump to the next
+. mirror, etc. when the user hits ctrl-c.
+.
+. Also modified the behavior of failure_callback slightly. It now
+. gets called for EVERY failure, even the last one. That is, if
+. there's a failure on the last try, the failure callback gets called
+. and (assuming it doesn't raise anything) the exception gets raised.
+. Previously, it would not be called on the last failure. This is
+. another MINOR compatibility break.
+.
+. 2005-08-17 Michael D. Stenner
+.
+. * test/test_grabber.py, test/test_mirror.py, urlgrabber/grabber.py:
+.
+. except HTTPError, which basically means all HTTP return codes that
+. are not 200 and not handled internally (such as the 401
+. Unauthorized code). Previously, these were caught as IOErrors (of
+. which HTTPError is an indirect subclass) and re-raised as
+. URLGrabErrors with errno 4. They are NOW reraised as URLGrabErrors
+. with errno 14, and with the .code and .exception attributes set to
+. the HTTP status code and HTTPError exception object respectively.
+.
+. This represents a minor compatibility break, as these "errors" are
+. now not handled the same way. They will be raised with a different
+. errno and errno 14 is not in the default list of retrycodes
+. (whereas 4 is).
+.
+. 2005-06-27 Michael D. Stenner
+.
+. * ChangeLog, urlgrabber/byterange.py:
+.
+. Fixed two namespace-related bugs.
+.
+. 2005-05-19 Michael D. Stenner
+.
+. * test/test_keepalive.py, urlgrabber/keepalive.py:
+.
+. started using sys.version_info rather than parsing sys.version
+.
+. 2005-04-04 Michael D. Stenner
+.
+. * urlgrabber/keepalive.py:
+.
+. Fixed a typo in that last change. Always remember to do "make
+. test" before committing :)
+.
+. 2005-04-04 Michael D. Stenner
+.
+. * urlgrabber/keepalive.py:
+.
+. Changed the version checking so SUSE's python "2.3+" version string
+. doesn't cause trouble.
+.
+. 2005-03-14 Michael D. Stenner
+.
+. * test/test_grabber.py, urlgrabber/grabber.py:
+.
+. Added tests to check proxy format. Otherwise, missing proto could
+. lead to very obscure exceptions.
+.
+. 2005-03-08 Michael D. Stenner
+.
+. * ChangeLog:
+.
+. updated ChangeLog
+.
+}
+. 2005-03-08 Michael D. Stenner
+.
+. * urlgrabber/__init__.py:
+.
+. release 2.9.6
+.
+. 2005-03-08 Michael D. Stenner
+.
+. * makefile, setup.py:
+.
+. Changed development status to Beta an fixed a small build bug. It
+. wasn't including MANIFEST and so wasn't building things properly
+. from the distributed tarballs.
+.
+. 2005-03-03 Michael D. Stenner
+.
+. * ChangeLog:
+.
+. updated ChangeLog
+.
+. 2005-03-03 Michael D. Stenner
+.
+. * urlgrabber/__init__.py:
+.
+. release 2.9.5
+.
+. 2005-03-03 Michael D. Stenner
+.
+. * urlgrabber/grabber.py:
+.
+. Fixed a bug caused by the recent proxy fix for python 2.2. This
+. new bug came up when both the FTPHandler and FTPRangeHandler were
+. pushed in.
+.
+. 2005-02-28 Michael D. Stenner
+.
+. * ChangeLog:
+.
+. updated ChangeLog
+.
+. 2005-02-28 Michael D. Stenner
+.
+. * urlgrabber/__init__.py:
+.
+. release 2.9.4
+.
+. 2005-02-28 Michael D. Stenner
+.
+. * urlgrabber/grabber.py:
+.
+. Added http_headers and ftp_headers options.
+.
+. 2005-02-25 Michael D. Stenner
+.
+. * TODO, test/base_test_code.py, test/test_grabber.py,
+. test/support/squid/squid-setup, test/support/squid/squid.conf,
+. test/support/testdata/README, test/support/testdata/reference,
+. test/support/testdata/short_reference,
+. test/support/testdata/mirror/broken/broken.txt,
+. test/support/testdata/mirror/broken/reference,
+. test/support/testdata/mirror/broken/short_reference,
+. test/support/testdata/mirror/broken/test1.txt,
+. test/support/testdata/mirror/broken/test2.txt,
+. test/support/testdata/mirror/m1/broken.txt,
+. test/support/testdata/mirror/m1/reference,
+. test/support/testdata/mirror/m1/short_reference,
+. test/support/testdata/mirror/m1/test1.txt,
+. test/support/testdata/mirror/m1/test2.txt,
+. test/support/testdata/mirror/m2/broken.txt,
+. test/support/testdata/mirror/m2/reference,
+. test/support/testdata/mirror/m2/short_reference,
+. test/support/testdata/mirror/m2/test1.txt,
+. test/support/testdata/mirror/m2/test2.txt,
+. test/support/testdata/mirror/m3/broken.txt,
+. test/support/testdata/mirror/m3/reference,
+. test/support/testdata/mirror/m3/short_reference,
+. test/support/testdata/mirror/m3/test1.txt,
+. test/support/testdata/mirror/m3/test2.txt,
+. test/support/vsftpd/ftp-server-setup,
+. test/support/vsftpd/vsftpd.conf,
+. test/support/vsftpd/vsftpd.ftpusers,
+. test/support/vsftpd/vsftpd.user_list, urlgrabber/grabber.py:
+.
+. Added data and instructions for setting up ftp and proxy servers
+. for testing and also added proxy tests. Fixed two bugs related to
+. proxies. One bug made it so the first time proxy data was passed
+. in, it was not actually used. The other prevented proxies from
+. working for ftp or non-keepalive http using python 2.2. The latter
+. is a bug in urllib2, but is now worked around (somewhat hackishly,
+. I admit) in urlgrabber. LocalWords: CVS
+.
+. 2005-02-22 Michael D. Stenner
+.
+. * ChangeLog:
+.
+. updated ChangeLog
+.
+. 2005-02-22 Michael D. Stenner
+.
+. * urlgrabber/__init__.py:
+.
+. release 2.9.3
+.
+. 2005-02-21 Michael D. Stenner
+.
+. * README:
+.
+. updated README with instructions for building rpms and notes about
+. python version compatibility.
+.
+. 2005-02-21 Michael D. Stenner
+.
+. * urlgrabber/__init__.py:
+.
+. updated project url
+.
+. 2005-02-21 Michael D. Stenner
+.
+. * urlgrabber/grabber.py:
+.
+. Fixed minor exception bug.
+.
+. 2005-02-14 Michael D. Stenner
+.
+. * ChangeLog:
+.
+. updated ChangeLog
+.
+. 2005-02-14 Michael D. Stenner
+.
+. * urlgrabber/__init__.py:
+.
+. release 2.9.2
+.
+. 2005-02-14 Michael D. Stenner
+.
+. * urlgrabber/grabber.py:
+.
+. Added http character encoding patch from Chris Lumens.
+.
+. 2005-02-14 Michael D. Stenner
+.
+. * test/test_grabber.py:
+.
+. slight cleanup - no new code
+.
+. 2005-02-14 Michael D. Stenner
+.
+. * ChangeLog:
+.
+. updated ChangeLog
+.
+. 2005-02-14 Michael D. Stenner
+.
+. * urlgrabber/__init__.py:
+.
+. release 2.9.1
+.
+. 2005-02-14 Michael D. Stenner
+.
+. * test/test_keepalive.py, test/test_mirror.py,
+. urlgrabber/byterange.py, urlgrabber/grabber.py,
+. urlgrabber/keepalive.py:
+.
+. Fixed python 2.4 bug - added .code attribute to returned file
+. objects. Changed keepalive.HANDLE_ERRORS behavior for the way 2.4
+. does things.
+.
+. 2005-02-08 Ryan Tomayko
+.
+. * makefile:
+.
+. Added Python 2.4 to lists of pythons to test..
+.
+. 2005-02-04 Ryan Tomayko
+.
+. * urlgrabber/byterange.py:
+.
+. Fixed RH bug #147124. range_tuple_header was not accounting for a
+. possible None value.
+. https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=147124
+.
+. 2005-01-14 Ryan Tomayko
+.
+. * test/test_grabber.py:
+.
+. Fixed proftpd test (was raising error when it shouldn't have been)
+.
+. 2005-01-14 Ryan Tomayko
+.
+. * urlgrabber/: grabber.py, progress.py:
+.
+. Applied urlgrabber-add-text.patch from Terje Rosten
+.
+. 2004-12-12 Ryan Tomayko
+.
+. * test/base_test_code.py, test/test_grabber.py,
+. urlgrabber/byterange.py:
+.
+. Fix for RH bug #140387
+. https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=140387
+.
+. 2004-10-17 Ryan Tomayko
+.
+. * urlgrabber/grabber.py:
+.
+. Added except for socket resource error when reading from urllib2
+. file object.
+.
+. 2004-10-16 Ryan Tomayko
+.
+. * urlgrabber/grabber.py:
+.
+. Fixed AttributeError problem that was pretty common after additions
+. for timeout support.
+.
+. 2004-10-08 Ryan Tomayko
+.
+. * TODO, urlgrabber/grabber.py:
+.
+. Added timeout support.
+.
+. 2004-09-07 Michael D. Stenner
+.
+. * TODO, scripts/urlgrabber, urlgrabber/__init__.py,
+. urlgrabber/grabber.py, urlgrabber/mirror.py,
+. urlgrabber/progress.py:
+.
+. Added Ryan's progress changes in. That still has some quirks I
+. need to iron out.
+.
+. Made all urlgrabber imports relative to work more nicely with yum's
+. package layout. This was straightforward except for the
+. __version__ import, which I solved in a slightly icky way.
+.
+. 2004-08-25 Michael D. Stenner
+.
+. * MANIFEST.in:
+.
+. Added "MANIFEST" to MANIFEST.in. Without this, one couldn't
+. correctly rebuild an rpm (for example) from the distributed
+. tarball.
+.
+. 2004-08-24 Ryan Tomayko
+.
+. * test/threading/batchgrabber.py:
+.
+. Updates for Michael's multifile progress bar changes.
+.
+. 2004-08-21 Michael D. Stenner
+.
+. * makefile, urlgrabber/__init__.py:
+.
+. Fixed a typo in the new 'release' target. Reformatted the doc in
+. __init__.py to be under 80 columns in PKG-INFO.
+.
+. 2004-08-21 Michael D. Stenner
+.
+. * ChangeLog:
+.
+. updated ChangeLog
+.
+. 2004-08-21 Michael D. Stenner
+.
+. * urlgrabber/__init__.py:
+.
+. release 2.9.0
+.
+. 2004-08-21 Michael D. Stenner
+.
+. * makefile:
+.
+. Added "release" and "daily" targets to makefile for automated
+. releases.
+.
+. 2004-08-20 Michael D. Stenner
+.
+. * test/: test_grabber.py, test_mirror.py:
+.
+. Updated callback tests to accomodate the new callback semantics.
+. Also added many new checkfunc tests to for URLGrabber.
+.
+. 2004-08-20 Michael D. Stenner
+.
+. * urlgrabber/: grabber.py, mirror.py:
+.
+. Changed the callback syntax. This affects failure_callback use in
+. both URLGrabber and MirrorGroup instances. It also affects
+. checkfunc use in URLGrabber instances. If you use these features,
+. you WILL need to change your code.
+.
+. 2004-08-12 Michael D. Stenner
+.
+. * test/test_mirror.py:
+.
+. enhanced the failover test to ensure that the bad mirror is tried
+. and fails
+.
+. 2004-08-12 Michael D. Stenner
+.
+. * test/test_grabber.py, urlgrabber/grabber.py:
+.
+. fixed typo in _make_callback and added testcase
+.
+. 2004-08-11 Michael D. Stenner
+.
+. * makefile, test/test_grabber.py, urlgrabber/grabber.py:
+.
+. Fixed a bug in URLGrabber._retry that prevented failure callbacks
+. from working. Also added a testcase for failure callbacks.
+.
+. 2004-08-09 Michael D. Stenner
+.
+. * urlgrabber/mirror.py:
+.
+. fixed documentation typos
+.
+. 2004-07-22 Michael D. Stenner
+.
+. * TODO, urlgrabber/grabber.py, urlgrabber/progress.py:
+.
+. Committing new progress meter code. This new version includes an
+. emulation of the old meter with pretty good backward compatibility.
+. However, if people are using custom progress meters, they might
+. break. This is a good general warning for the next few versions of
+. urlgrabber, actually. As the threading stuff evolves, the progress
+. meter may have to change its behavior a bit.
+.
+. 2004-07-21 Ryan Tomayko
+.
+. * .cvsignore:
+.
+. Ignoring kdevelop, kate, and anjuta project files. Ignoring
+. directories created by distutils: build, dist
+.
+. 2004-07-21 Ryan Tomayko
+.
+. * TODO:
+.
+. opener related items for ALPHA are completed. Moved stuff related
+. to getting a release out up into ALPHA section.
+.
+. 2004-07-21 Ryan Tomayko
+.
+. * setup.py, urlgrabber/__init__.py:
+.
+. Added distutils trove classifiers to setup.py for eventual
+. inclusion in PyPI. Added general description string for urlgrabber
+. package (__init__.py).
+.
+. 2004-07-21 Ryan Tomayko
+.
+. * urlgrabber/grabber.py:
+.
+. Fixed bug with urllib2.OpenerDirector caching. Added cache_openers
+. option (pass to any urlXXX function/method) to control
+. OpenerDirector caching. The default is to cache openers.
+.
+. 2004-04-02 Michael D. Stenner
+.
+. * makefile, test/test_grabber.py:
+.
+. Tidied up the grabber tests so we're not always skipping those ftp
+. reget tests (the just don't happen now). Also, modified the
+. makefile so that "make test" runs the testsuite with -v 1 for BOTH
+. python2.2 and 2.3 if they are available.
+.
+. 2004-03-31 Michael D. Stenner
+.
+. * LICENSE, maint/license-notice, scripts/urlgrabber,
+. test/grabberperf.py, test/runtests.py, test/test_byterange.py,
+. test/test_grabber.py, test/test_keepalive.py, test/test_mirror.py,
+. test/threading/batchgrabber.py, urlgrabber/byterange.py,
+. urlgrabber/grabber.py, urlgrabber/keepalive.py,
+. urlgrabber/mirror.py, urlgrabber/progress.py:
+.
+. Replace LICENSE with the LGPL. Added maint/license-notice, which
+. contains the corresponding license-snippet that should be included
+. at the top of each file. Finally, added this snippet to all of the
+. .py files. urlgrabber is now fully LGPL-ed.
+.
+. 2004-03-31 Michael D. Stenner
+.
+. * urlgrabber/grabber.py:
+.
+. fixed minor typo in opener option docs
+.
+. 2004-03-31 Ryan Tomayko
+.
+. * test/runtests.py:
+.
+. Added support for specifying verbosity of unit tests when using
+. runtests.py. 'python runtests.py --help' for usage.
+.
+. 2004-03-31 Ryan Tomayko
+.
+. * test/test_grabber.py, urlgrabber/grabber.py:
+.
+. Added opener kwarg/option to allow overriding of
+. urllib2.OpenerDirector.
+.
+. 2004-03-31 Michael D. Stenner
+.
+. * test/munittest.py, test/test_keepalive.py,
+. urlgrabber/keepalive.py:
+.
+. Fixed a few bugs related to python 2.3.
+.
+. 2004-03-29 Michael D. Stenner
+.
+. * TODO:
+.
+. updated TODO
+.
+. 2004-03-29 Michael D. Stenner
+.
+. * ChangeLog, makefile:
+.
+. Modified "make ChangeLog" target so that it doesn't print the
+. times. If we use local times, then it varies with who rebuilds the
+. ChangeLog file. Also updated the ChangeLog to the new format.
+.
+. 2004-03-28 Michael D. Stenner
+.
+. * makefile:
+.
+. Added "make test" target.
+.
+. 2004-03-28 Michael D. Stenner
+.
+. * urlgrabber/grabber.py:
+.
+. Fixed documentation error in the range docs. Temporarily disabled
+. the opener-caching because it seems to not quite work correctly
+. yet.
+.
+. 2004-03-28 Michael D. Stenner
+.
+. * test/test_keepalive.py:
+.
+. Changed the wait time for the dropped-connection test. The apache
+. keepalive timeout is 15 seconds, so I set the test to 20.
+.
+. 2004-03-28 Ryan Tomayko
+.
+. * TODO:
+.
+. Removed items from TODO list related to options handling as well as
+. the item on reget not working due to how options are copied.
+.
+. 2004-03-28 Ryan Tomayko
+.
+. * test/test_grabber.py, urlgrabber/grabber.py:
+.
+. Modified URLGrabberOptions to use "instance-inheritance" pattern
+. instead of copying all options.
+.
+. 2004-03-28 Ryan Tomayko
+.
+. * urlgrabber/mirror.py:
+.
+. Fixed real small typo in mirror doc string.
+.
+. 2004-03-28 Michael D. Stenner
+.
+. * urlgrabber/grabber.py:
+.
+. Tidied up some of the callback code.
+.
+. 2004-03-21 Michael D. Stenner
+.
+. * ChangeLog, TODO, test/threading/batchgrabber.py,
+. test/threading/urls-keepalive, test/threading/urls-many:
+.
+. Updated ChangeLog, TODO. Played with the batchgrabber stuff a
+. little.
+.
+. 2004-03-21 Michael D. Stenner
+.
+. * test/test_keepalive.py:
+.
+. added a simple threading test
+.
+. 2004-03-21 Michael D. Stenner
+.
+. * urlgrabber/keepalive.py:
+.
+. Rearranged keepalive a little to make it nicer. Also made it deal
+. better with dead connections. Now it checks ALL "free"
+. connections.
+.
+. 2004-03-20 Michael D. Stenner
+.
+. * TODO, urlgrabber/keepalive.py:
+.
+. OK, I THINK I've made keepalive thread-friendly. The interface is
+. completely unchanged. It passes all the existing unittests
+. (although none of them test threading issues). I need to make some
+. new tests for thread stuff.
+.
+. 2004-03-19 Michael D. Stenner
+.
+. * test/grabberperf.py:
+.
+. Made tests a little cleaner and added "none" test, which is a
+. reasonable "ideal" test case.
+.
+. 2004-03-19 Ryan Tomayko
+.
+. * TODO:
+.
+. Removed item on reducing urllib2 opener director creation.
+.
+. 2004-03-19 Ryan Tomayko
+.
+. * test/grabberperf.py:
+.
+. Added proxies to performance tests to ensure that ProxyHandlers are
+. being added to the urllib2 OpenerDirector.
+.
+. 2004-03-19 Ryan Tomayko
+.
+. * urlgrabber/grabber.py:
+.
+. Caching urllib2 ProxyHandler and OpenerDirectors instead of
+. creating new ones for each request.
+.
+. 2004-03-19 Michael D. Stenner
+.
+. * TODO, urlgrabber/grabber.py:
+.
+. In grabber.py, fixed a bug with urlread when no limit is specified.
+. Made the "natural" file translation work for relative pythnames.
+.
+. 2004-03-19 Michael D. Stenner
+.
+. * test/: base_test_code.py, test_grabber.py:
+.
+. Added file and ftp tests for reget.
+.
+. 2004-03-18 Michael D. Stenner
+.
+. * test/: base_test_code.py, munittest.py, runtests.py,
+. test_byterange.py, test_grabber.py, test_keepalive.py,
+. test_mirror.py:
+.
+. By popular demand, I broke out the unittest coolness into a
+. separate module (munittest.py) which is largely a drop-in
+. replacement for unittest.py. The whole test setup is just way cool
+. at this point.
+.
+. 2004-03-18 Michael D. Stenner
+.
+. * test/: base_test_code.py, runtests.py, test_byterange.py,
+. test_grabber.py, test_keepalive.py, test_mirror.py:
+.
+. Made tests prettier (again... I do like pretty tests) and created
+. the test result "skip" intended for things like ftp tests where
+. there may not be a server available. None of the available "ok",
+. "FAIL" or "ERROR" were really appropriate.
+.
+. 2004-03-17 Michael D. Stenner
+.
+. * urlgrabber/mirror.py:
+.
+. Added a little more documentation.
+.
+. 2004-03-17 Michael D. Stenner
+.
+. * TODO, test/test_mirror.py, urlgrabber/mirror.py:
+.
+. Made the control of MirrorGroup failover action WAY cooler and more
+. flexible. Added tests
+.
+. 2004-03-16 Michael D. Stenner
+.
+. * TODO, test/base_test_code.py, test/test_grabber.py,
+. urlgrabber/grabber.py:
+.
+. Changed test base_url according to new web page. Added prefix
+. argument to URLGrabber, along with docs and test. Also documented
+. URLGrabber failure_callback.
+.
+. 2004-03-15 Michael D. Stenner
+.
+. * TODO:
+.
+. updated TODO after alpha-huddle
+.
+. 2004-03-15 Ryan Tomayko
+.
+. * test/test_grabber.py:
+.
+. Grabber tests now call package level urlgrab, urlopen, urlread
+. functions.
+.
+. 2004-03-15 Ryan Tomayko
+.
+. * urlgrabber/__init__.py:
+.
+. Added urlgrab, urlopen, urlread as package level exports.
+.
+. 2004-03-14 Michael D. Stenner
+.
+. * ChangeLog, scripts/urlgrabber, urlgrabber/byterange.py,
+. urlgrabber/grabber.py, urlgrabber/keepalive.py,
+. urlgrabber/mirror.py:
+.
+. Added Id: strings to distributed python files, made grabber.py use
+. the global version information from __init__.py, and updated
+. copyright statements to include 2004 :)
+.
+. 2004-03-14 Michael D. Stenner
+.
+. * TODO:
+.
+. updated TODO
+.
+. 2004-03-14 Michael D. Stenner
+.
+. * setup.py, urlgrabber/__init__.py:
+.
+. Put version, date, url, and author information into __init__.py and
+. have setup.py read them in from there. Also put Id: tag into
+. __init__.py, which we should probably do in the other files as
+. well.
+.
+. 2004-03-13 Michael D. Stenner
+.
+. * test/test_mirror.py, urlgrabber/mirror.py:
+.
+. improved mirror.py docs, added MGRandomStart (which is equivalent
+. to yum's roundrobin failover policy) and MGRandomOrder. Added
+. associated tests for these new classes.
+.
+. 2004-03-13 Michael D. Stenner
+.
+. * test/test_byterange.py:
+.
+. converted test_byterange.py to use base_test_code
+.
+. 2004-03-13 Michael D. Stenner
+.
+. * TODO:
+.
+. updated TODO
+.
+. 2004-03-13 Michael D. Stenner
+.
+. * test/: base_test_code.py, test_grabber.py, test_keepalive.py:
+.
+. making tests a little nicer
+.
+. 2004-03-13 Michael D. Stenner
+.
+. * test/: base_test_code.py, test_grabber.py, test_mirror.py:
+.
+. added callback test to test_mirror.py. Restructuring the test code
+. a little. Starting to move common definitions (like the reference
+. data) into base_test_code.py, which can be accessed from the other
+. test modules. Also defined UGTestCase which prints things slightly
+. more prettily.
+.
+. Also, I'd like to go on record as saying that I absolutely hate the
+. __attribute munging in python, and I don't think it should ever be
+. used. Thank you for your attention.
+.
+. 2004-03-13 Michael D. Stenner
+.
+. * urlgrabber/: grabber.py, mirror.py:
+.
+. added "failure_callback" options to both MirrorGroup and
+. URLGrabber. These allow the calling application to know when a
+. retry/failover is occurring and what caused it (the exception is
+. passed to the callback).
+.
+. 2004-03-13 Michael D. Stenner
+.
+. * test/: runtests.py, test_grabber.py, test_keepalive.py:
+.
+. added keepalive tests; fixed test_bad_reget_type
+.
+. 2004-03-12 Michael D. Stenner
+.
+. * urlgrabber/grabber.py:
+.
+. Fixed a stupid typo in the splituser fix (from 5 minutes ago)
+.
+. 2004-03-12 Michael D. Stenner
+.
+. * urlgrabber/.cvsignore:
+.
+. Added .cvsignore to urlgrabber/ directory
+.
+. 2004-03-12 Michael D. Stenner
+.
+. * TODO:
+.
+. Updated TODO.
+.
+. 2004-03-12 Michael D. Stenner
+.
+. * urlgrabber/: grabber.py, mirror.py:
+.
+. Fixed a typo in mirror.py. Added reget docs and reformatted
+. module-level docs in grabber.py. Also fixed a "bug" in grabber.py
+. that came about because the python 2.3 urllib2 no longer includes
+. splituser and splitpasswd.
+.
+. 2004-03-12 Ryan Tomayko
+.
+. * test/test_grabber.py:
+.
+. Using a tempfile for urlgrab test instead of named file in cwd.
+.
+. 2004-03-12 Ryan Tomayko
+.
+. * urlgrabber/grabber.py:
+.
+. Removed ugly code that was suppose to allow a file-object to be
+. passed to urlgrab as the output file.
+.
+. 2004-03-12 Ryan Tomayko
+.
+. * test/runtests.py:
+.
+. Added mirror tests to main test suite.
+.
+. 2004-03-12 Michael D. Stenner
+.
+. * test/test_grabber.py, urlgrabber/grabber.py:
+.
+. Added first attempt at reget support. Created test suite for http.
+.
+. 2004-03-11 Michael D. Stenner
+.
+. * ChangeLog, MANIFEST.in, README, makefile, setup.py,
+. maint/cvs2cl.pl, maint/usermap:
+.
+. Updated MANIFEST.in and README. Created 'maint' directory
+. containing cvs2cl.pl and usermap file (to make usernames to email
+. address for cvs2cl.pl). Also created a "ChangeLog" target in the
+. makefile to build the Changelog.
+.
+. 2004-03-11 Michael D. Stenner
+.
+. * TODO, urlgrabber/keepalive.py:
+.
+. [TODO] added not about restructuring import style
+.
+. [keepalive.py] improved exception-handling and made it a bit more
+. fault-tolerant. Specifically, it now better addresses
+. dropped-connections. Also added a dropped-connection test to the
+. test code in keepalive.py. For now, I'd like to keep this code in
+. keepalive.py for convenient diagnostics from users.
+.
+. 2004-03-10 Michael D. Stenner
+.
+. * TODO, test/test_mirror.py, urlgrabber/grabber.py,
+. urlgrabber/keepalive.py, urlgrabber/mirror.py:
+.
+. Added mirror code and associated test code.
+.
+. [grabber.py] Edited URLGrabError doc string to reflect MirrorGroup
+. error code and new error code policy.
+.
+. [TODO] Moved reget to ALPHA (from ALPHA 2)
+.
+. [keepalive.py] fixed problem with the new python 2.3 httplib. They
+. now raise BadStatusLine from a new place.
+.
+. 2004-03-10 Ryan Tomayko
+.
+. * test/: grabberperf.py, test_byterange.py, test_grabber.py:
+.
+. Fixed up tests to work with new directory layout. Note: most
+. sys.path hackery has been removed so you will need to setup
+. PYTHONPATH manually before running test scripts.
+.
+. 2004-03-10 Ryan Tomayko
+.
+. * setup.py:
+.
+. Fixed up for new cvs structure. Tried to simplify a bit while I was
+. here.
+.
+. 2004-03-10 Michael D. Stenner
+.
+. * ChangeLog:
+.
+. Edited the cvs repo to restructure a little bit. All module python
+. code (grabber.py, byterange.py, progress.py, keepalive.py, and
+. __init__.py) have been moved into a "urlgrabber" directory.
+.
+. Removed the ChangeLog file from the repo and created a new empty
+. one. In the intial (empty) version, I included the old logs from
+. before the repo-merge. Therefore, those logs appear as a single
+. log-entry. It's a little bit of a hack, but it's good enough. All
+. the info is there, and it allows us to move forward without having
+. to keep special-casing those old logs.
+.
+. All future changelogs will be generated with cvs2cs.pl.
+.
+. Other than these actions, I didn't do anything to any files.
+. Therefore, a number of things are certainly broken, as they expect
+. things to be in other places. Things I can think of: test code,
+. setup.py.
+.
+. 2004-03-08 Ryan Tomayko
+.
+. * TODO:
+.
+. raise exceptions for invalid ranges DONE.
+.
+. 2004-03-08 Ryan Tomayko
+.
+. * urlgrabber/grabber.py:
+.
+. Added support for RangeError to URLGrabError handling (errno 9).
+. Also, default is _not_ to retry grabs resulting in RangeError.
+.
+. 2004-03-08 Ryan Tomayko
+.
+. * urlgrabber/byterange.py:
+.
+. Fixed try/except without an explicit exception. that's a no-no.
+.
+. 2004-03-08 Ryan Tomayko
+.
+. * urlgrabber/byterange.py:
+.
+. raising RangeError anytime a range is non-satisfiable.
+.
+. 2004-03-08 Ryan Tomayko
+.
+. * urlgrabber/grabber.py:
+.
+. Always copy_local when a byte range is specified to urlgrab.
+.
+. 2004-03-08 Ryan Tomayko
+.
+. * TODO:
+.
+. urllib2 conditionals were cleaned up a while ago..
+.
+. 2004-03-08 Ryan Tomayko
+.
+. * TODO:
+.
+. Confirmed FTP connections are being closed properly when using
+. ranges. Added item for using CacheFTPHandler so FTP connections
+. are reused. Moved reget item under ALPHA 2. Added items for
+. keepalive/progress_meter w/ multiple threads. viewcvs DONE. Test
+. under multiple threads DONE. Basic performance tests DONE.
+.
+. 2004-03-06 Ryan Tomayko
+.
+. * test/threading/: urls-keepalive, urls-many:
+.
+. Some URLs for batchgrabber.py.
+.
+. 2004-03-06 Ryan Tomayko
+.
+. * test/threading/batchgrabber.py:
+.
+. Module for testing urlgrabber w/ multiple threads.
+.
+. 2004-03-01 Ryan Tomayko
+.
+. * urlgrabber/grabber.py:
+.
+. Performance tweak to bypass URLGrabberFileObject.read when not
+. using progress_obj or throttle.
+.
+. 2004-03-01 Ryan Tomayko
+.
+. * urlgrabber/grabber.py:
+.
+. Removed speed test from grabber.py. This test has been moved to
+. test/grabberperf.py
+.
+. 2004-03-01 Ryan Tomayko
+.
+. * test/grabberperf.py:
+.
+. First cut at a module to run through some performance tests for
+. grabber.py
+.
+. 2004-03-01 Ryan Tomayko
+.
+. * test/test_grabber.py:
+.
+. Clean up temp files after testing urlgrab.
+.
+. 2004-02-28 Ryan Tomayko
+.
+. * TODO:
+.
+. Added reget and batch stuff under the 'maybe' section.
+.
+. 2004-02-22 Ryan Tomayko
+.
+. * urlgrabber/grabber.py:
+.
+. Brought in fix from yum for user:pass escaping in URLs.
+.
+. 2004-02-22 Ryan Tomayko
+.
+. * TODO, urlgrabber/grabber.py:
+.
+. Added checkfunc support to urlread.
+.
+. 2004-02-22 Ryan Tomayko
+.
+. * TODO, urlgrabber/byterange.py:
+.
+. Changed all variables/args named 'range' to rangetup or brange to
+. avoid possible python keyword clash.
+.
+. 2004-02-22 Ryan Tomayko
+.
+. * TODO:
+.
+. Added item about handling KeyboardInterupt exceptions.
+.
+. 2004-02-22 Ryan Tomayko
+.
+. * TODO:
+.
+. Restructured TODO list based on recent email from Michael.
+.
+. 2004-02-14 Ryan Tomayko
+.
+. * .cvsignore, MANIFEST.in, README, TODO, progress_meter.py,
+. setup.py, urlgrabber.py, scripts/urlgrabber, test/.cvsignore,
+. test/runtests.py, test/test_byterange.py, test/test_grabber.py,
+. test/test_urlgrabber.py, urlgrabber/__init__.py,
+. urlgrabber/byterange.py, urlgrabber/grabber.py,
+. urlgrabber/progress.py:
+.
+. Mass commit/merge of changes made for urlgrabber restructuring..
+. See ChangeLog for details.
+.
+. 2004-02-11 Michael D. Stenner
+.
+. * ChangeLog:
+.
+. These are the changes from when ryan was working in a separate
+. repo. They are included here in bulk for ChangeLog simplicity.
+.
+. 2004-02-09 Friday rtomayko
+.
+. * TODO: Initial commit of TODO list..
+.
+. 2004-01-29 Thursday rtomayko
+.
+. * grabber.py, grabbertests.py, urlgrabber: Added proxy support.
+.
+. * grabber.py: Removed default option copying in URLGrabber class
+. because this could cause side-effects.
+.
+. 2004-01-25 Sunday rtomayko
+.
+. * setup.py: Fixed bad syntax on doc files..
+.
+. * setup.py, urlgrabber: Added urlgrabber script to distutils
+. install.
+.
+. 2003-12-16 Tuesday rtomayko
+.
+. * byterange.py, grabber.py: Initial crack at range support for
+. FTP - working but ugly..
+.
+. 2003-12-12 Friday rtomayko
+.
+. * byterange.py, byterangetests.py: Make range tuples work like
+. python slices. endpos is non-inclusive.
+.
+. 2003-12-10 Wednesday rtomayko
+.
+. * scripts/urlgrabber: Quick script to call urlgrabber from the
+. command line. I've been using it in place of wget to get as
+. much testing as possible.
+.
+. * grabber.py: Fixed small problem with progress meter not
+. updating properly. This was caused by moving _do_grab to
+. URLGrabberFileObject.
+.
+. * grabber.py: Moved _do_open and per-request variables into
+. URLGrabberFileObject
+.
+. * grabber.py: Added doc for user_agent kwarg
+.
+. * grabber.py: Moved urllib2 Handler configuration to
+. URLGrabberFileObject. Each request has a
+. urllib2.OpenerDirector now that is customized based on
+. options.
+.
+. * grabber.py: Added keepalive kwarg. keepalive can be
+. specified on a per-request basis now.
+.
+. * grabber.py: Changed method of handling retries. I'm not
+. sure if I like this yet. All URLGrabber.urlXXX methods
+. define a function and pass it to the URLGrabber._retry()
+. method which calls the function provided and handles
+. retrying if the provided function raises a URLGrabError.
+.
+. 2003-12-09 Tuesday rtomayko
+.
+. * grabber.py: default_grabber is now a default instance of
+. URLGrabber used by the module level urlXXX functions.
+.
+. * grabber.py: deprecated module level set_ methods.
+. default_grabber should be used instead.
+.
+. * grabber.py: URLGrabber should be completely stateless now.
+. Using a single URLGrabber object for multiple requests
+. shouldn't be a problem.
+.
+. * grabber.py: Added URLGrabberOptions class to handle kwargs
+. more elegantly.
+.
+. 2003-12-08 Monday rtomayko
+.
+. * __init__.py, setup.py: everything should be in a package named
+. urlgrabber now (distutils is a nice piece of software).
+.
+. * grabber.py, range.py, rangetests.py, grabbertests.py: - Initial
+. attempt at range support for HTTP and local files through range
+. module.
+.
+. * byterange.py: brought in skeleton classes from urllib2 for FTP
+. range support but FTP ranges are not yet supported.
+.
+. * byterange.py: Local file range support internally implemented
+. with RangeableFileObject which is a pretty cool file object
+. wrapper that automatically handles ranging.
+.
+. 2003-12-07 Sunday rtomayko
+.
+. * grabber.py: minor changes for line wrapping
+.
+. * grabber.py: added URLGrabber class (moved _do_open, _do_grab,
+. _parse_url, _get_raw_throttle into said class).
+.
+. * grabber.py: deprecated retrygrab. now supported through
+. urlgrab(retry=n).
+.
+. * grabber.py: local filenames (i.e. without URL scheme
+. specifier) are now handled wherever a url may be passed.
+.
+. * grabber.py: retry is now fully supported on urlread and
+. partially supported on urlopen. wrt to urlopen, retries are
+. possible if an URLGrabError occurs while _opening_ the url
+. (e.g. due to temporarily unavailable, etc). however, we
+. still can't protect anyone from problems that occur after
+. urlopen has returned the file object.
+.
+. 2003-12-05 Friday rtomayko
+.
+. * grabber.py: Initial attempt at retooling the interface to use
+. kwargs.. *Most* functionality should still be working..
+.
+. 2003-10-13 Michael D. Stenner
+.
+. * urlgrabber.py, test/test_urlgrabber.py:
+.
+. created test_urlgrabber.py and started moving tests into it
+.
+. 2003-10-12 Michael D. Stenner
+.
+. * README, makefile, progress_meter.py, urlgrabber.py,
+. urlgrabber/keepalive.py, LICENSE, MANIFEST.in, setup.py:
+.
+. Initial revision
+.
+. 2003-10-12 Michael D. Stenner
+.
+. * README, makefile, progress_meter.py, urlgrabber.py,
+. urlgrabber/keepalive.py, LICENSE, MANIFEST.in, setup.py:
+.
+. importing urlgrabber
+.
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/df/copyright-20051231042924-b0be1c06ec5d87bc.weave
+++ urlgrabber-2.9.7/.bzr/weaves/df/copyright-20051231042924-b0be1c06ec5d87bc.weave
@@ -0,0 +1,22 @@
+# bzr weave file v5
+i
+1 3fe1cd94c98f8111fee75b9b7fe29f60cbf8052c
+n wildfire@progsoc.org-20051231042934-0a9043755ce47970
+
+w
+{ 0
+. This package was debianized by Anand Kumria on
+. Sun, 9 Oct 2005 13:06:55 +1000.
+.
+. It was downloaded from
+.
+. Copyright Holder:
+. Michael D. Stenner
+. Ryan Tomayko
+.
+. License:
+.
+. python-urlgrabber is released under the terms of the GNU Lesser General
+. Licence (LGPL) a copy of which may be found in /usr/share/common-licenses/LGPL
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/df/LICENSE-20051228065045-da1ba0b849f84ac3.weave
+++ urlgrabber-2.9.7/.bzr/weaves/df/LICENSE-20051228065045-da1ba0b849f84ac3.weave
@@ -0,0 +1,467 @@
+# bzr weave file v5
+i
+1 39a21f33cadea18adcc23bf808d7d5ea6419c8b1
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+{ 0
+. GNU LESSER GENERAL PUBLIC LICENSE
+. Version 2.1, February 1999
+.
+. Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+. Everyone is permitted to copy and distribute verbatim copies
+. of this license document, but changing it is not allowed.
+.
+. [This is the first released version of the Lesser GPL. It also counts
+. as the successor of the GNU Library Public License, version 2, hence
+. the version number 2.1.]
+.
+. Preamble
+.
+. The licenses for most software are designed to take away your
+. freedom to share and change it. By contrast, the GNU General Public
+. Licenses are intended to guarantee your freedom to share and change
+. free software--to make sure the software is free for all its users.
+.
+. This license, the Lesser General Public License, applies to some
+. specially designated software packages--typically libraries--of the
+. Free Software Foundation and other authors who decide to use it. You
+. can use it too, but we suggest you first think carefully about whether
+. this license or the ordinary General Public License is the better
+. strategy to use in any particular case, based on the explanations below.
+.
+. When we speak of free software, we are referring to freedom of use,
+. not price. Our General Public Licenses are designed to make sure that
+. you have the freedom to distribute copies of free software (and charge
+. for this service if you wish); that you receive source code or can get
+. it if you want it; that you can change the software and use pieces of
+. it in new free programs; and that you are informed that you can do
+. these things.
+.
+. To protect your rights, we need to make restrictions that forbid
+. distributors to deny you these rights or to ask you to surrender these
+. rights. These restrictions translate to certain responsibilities for
+. you if you distribute copies of the library or if you modify it.
+.
+. For example, if you distribute copies of the library, whether gratis
+. or for a fee, you must give the recipients all the rights that we gave
+. you. You must make sure that they, too, receive or can get the source
+. code. If you link other code with the library, you must provide
+. complete object files to the recipients, so that they can relink them
+. with the library after making changes to the library and recompiling
+. it. And you must show them these terms so they know their rights.
+.
+. We protect your rights with a two-step method: (1) we copyright the
+. library, and (2) we offer you this license, which gives you legal
+. permission to copy, distribute and/or modify the library.
+.
+. To protect each distributor, we want to make it very clear that
+. there is no warranty for the free library. Also, if the library is
+. modified by someone else and passed on, the recipients should know
+. that what they have is not the original version, so that the original
+. author's reputation will not be affected by problems that might be
+. introduced by others.
+.
+. Finally, software patents pose a constant threat to the existence of
+. any free program. We wish to make sure that a company cannot
+. effectively restrict the users of a free program by obtaining a
+. restrictive license from a patent holder. Therefore, we insist that
+. any patent license obtained for a version of the library must be
+. consistent with the full freedom of use specified in this license.
+.
+. Most GNU software, including some libraries, is covered by the
+. ordinary GNU General Public License. This license, the GNU Lesser
+. General Public License, applies to certain designated libraries, and
+. is quite different from the ordinary General Public License. We use
+. this license for certain libraries in order to permit linking those
+. libraries into non-free programs.
+.
+. When a program is linked with a library, whether statically or using
+. a shared library, the combination of the two is legally speaking a
+. combined work, a derivative of the original library. The ordinary
+. General Public License therefore permits such linking only if the
+. entire combination fits its criteria of freedom. The Lesser General
+. Public License permits more lax criteria for linking other code with
+. the library.
+.
+. We call this license the "Lesser" General Public License because it
+. does Less to protect the user's freedom than the ordinary General
+. Public License. It also provides other free software developers Less
+. of an advantage over competing non-free programs. These disadvantages
+. are the reason we use the ordinary General Public License for many
+. libraries. However, the Lesser license provides advantages in certain
+. special circumstances.
+.
+. For example, on rare occasions, there may be a special need to
+. encourage the widest possible use of a certain library, so that it becomes
+. a de-facto standard. To achieve this, non-free programs must be
+. allowed to use the library. A more frequent case is that a free
+. library does the same job as widely used non-free libraries. In this
+. case, there is little to gain by limiting the free library to free
+. software only, so we use the Lesser General Public License.
+.
+. In other cases, permission to use a particular library in non-free
+. programs enables a greater number of people to use a large body of
+. free software. For example, permission to use the GNU C Library in
+. non-free programs enables many more people to use the whole GNU
+. operating system, as well as its variant, the GNU/Linux operating
+. system.
+.
+. Although the Lesser General Public License is Less protective of the
+. users' freedom, it does ensure that the user of a program that is
+. linked with the Library has the freedom and the wherewithal to run
+. that program using a modified version of the Library.
+.
+. The precise terms and conditions for copying, distribution and
+. modification follow. Pay close attention to the difference between a
+. "work based on the library" and a "work that uses the library". The
+. former contains code derived from the library, whereas the latter must
+. be combined with the library in order to run.
+.
+. GNU LESSER GENERAL PUBLIC LICENSE
+. TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+.
+. 0. This License Agreement applies to any software library or other
+. program which contains a notice placed by the copyright holder or
+. other authorized party saying it may be distributed under the terms of
+. this Lesser General Public License (also called "this License").
+. Each licensee is addressed as "you".
+.
+. A "library" means a collection of software functions and/or data
+. prepared so as to be conveniently linked with application programs
+. (which use some of those functions and data) to form executables.
+.
+. The "Library", below, refers to any such software library or work
+. which has been distributed under these terms. A "work based on the
+. Library" means either the Library or any derivative work under
+. copyright law: that is to say, a work containing the Library or a
+. portion of it, either verbatim or with modifications and/or translated
+. straightforwardly into another language. (Hereinafter, translation is
+. included without limitation in the term "modification".)
+.
+. "Source code" for a work means the preferred form of the work for
+. making modifications to it. For a library, complete source code means
+. all the source code for all modules it contains, plus any associated
+. interface definition files, plus the scripts used to control compilation
+. and installation of the library.
+.
+. Activities other than copying, distribution and modification are not
+. covered by this License; they are outside its scope. The act of
+. running a program using the Library is not restricted, and output from
+. such a program is covered only if its contents constitute a work based
+. on the Library (independent of the use of the Library in a tool for
+. writing it). Whether that is true depends on what the Library does
+. and what the program that uses the Library does.
+.
+. 1. You may copy and distribute verbatim copies of the Library's
+. complete source code as you receive it, in any medium, provided that
+. you conspicuously and appropriately publish on each copy an
+. appropriate copyright notice and disclaimer of warranty; keep intact
+. all the notices that refer to this License and to the absence of any
+. warranty; and distribute a copy of this License along with the
+. Library.
+.
+. You may charge a fee for the physical act of transferring a copy,
+. and you may at your option offer warranty protection in exchange for a
+. fee.
+.
+. 2. You may modify your copy or copies of the Library or any portion
+. of it, thus forming a work based on the Library, and copy and
+. distribute such modifications or work under the terms of Section 1
+. above, provided that you also meet all of these conditions:
+.
+. a) The modified work must itself be a software library.
+.
+. b) You must cause the files modified to carry prominent notices
+. stating that you changed the files and the date of any change.
+.
+. c) You must cause the whole of the work to be licensed at no
+. charge to all third parties under the terms of this License.
+.
+. d) If a facility in the modified Library refers to a function or a
+. table of data to be supplied by an application program that uses
+. the facility, other than as an argument passed when the facility
+. is invoked, then you must make a good faith effort to ensure that,
+. in the event an application does not supply such function or
+. table, the facility still operates, and performs whatever part of
+. its purpose remains meaningful.
+.
+. (For example, a function in a library to compute square roots has
+. a purpose that is entirely well-defined independent of the
+. application. Therefore, Subsection 2d requires that any
+. application-supplied function or table used by this function must
+. be optional: if the application does not supply it, the square
+. root function must still compute square roots.)
+.
+. These requirements apply to the modified work as a whole. If
+. identifiable sections of that work are not derived from the Library,
+. and can be reasonably considered independent and separate works in
+. themselves, then this License, and its terms, do not apply to those
+. sections when you distribute them as separate works. But when you
+. distribute the same sections as part of a whole which is a work based
+. on the Library, the distribution of the whole must be on the terms of
+. this License, whose permissions for other licensees extend to the
+. entire whole, and thus to each and every part regardless of who wrote
+. it.
+.
+. Thus, it is not the intent of this section to claim rights or contest
+. your rights to work written entirely by you; rather, the intent is to
+. exercise the right to control the distribution of derivative or
+. collective works based on the Library.
+.
+. In addition, mere aggregation of another work not based on the Library
+. with the Library (or with a work based on the Library) on a volume of
+. a storage or distribution medium does not bring the other work under
+. the scope of this License.
+.
+. 3. You may opt to apply the terms of the ordinary GNU General Public
+. License instead of this License to a given copy of the Library. To do
+. this, you must alter all the notices that refer to this License, so
+. that they refer to the ordinary GNU General Public License, version 2,
+. instead of to this License. (If a newer version than version 2 of the
+. ordinary GNU General Public License has appeared, then you can specify
+. that version instead if you wish.) Do not make any other change in
+. these notices.
+.
+. Once this change is made in a given copy, it is irreversible for
+. that copy, so the ordinary GNU General Public License applies to all
+. subsequent copies and derivative works made from that copy.
+.
+. This option is useful when you wish to copy part of the code of
+. the Library into a program that is not a library.
+.
+. 4. You may copy and distribute the Library (or a portion or
+. derivative of it, under Section 2) in object code or executable form
+. under the terms of Sections 1 and 2 above provided that you accompany
+. it with the complete corresponding machine-readable source code, which
+. must be distributed under the terms of Sections 1 and 2 above on a
+. medium customarily used for software interchange.
+.
+. If distribution of object code is made by offering access to copy
+. from a designated place, then offering equivalent access to copy the
+. source code from the same place satisfies the requirement to
+. distribute the source code, even though third parties are not
+. compelled to copy the source along with the object code.
+.
+. 5. A program that contains no derivative of any portion of the
+. Library, but is designed to work with the Library by being compiled or
+. linked with it, is called a "work that uses the Library". Such a
+. work, in isolation, is not a derivative work of the Library, and
+. therefore falls outside the scope of this License.
+.
+. However, linking a "work that uses the Library" with the Library
+. creates an executable that is a derivative of the Library (because it
+. contains portions of the Library), rather than a "work that uses the
+. library". The executable is therefore covered by this License.
+. Section 6 states terms for distribution of such executables.
+.
+. When a "work that uses the Library" uses material from a header file
+. that is part of the Library, the object code for the work may be a
+. derivative work of the Library even though the source code is not.
+. Whether this is true is especially significant if the work can be
+. linked without the Library, or if the work is itself a library. The
+. threshold for this to be true is not precisely defined by law.
+.
+. If such an object file uses only numerical parameters, data
+. structure layouts and accessors, and small macros and small inline
+. functions (ten lines or less in length), then the use of the object
+. file is unrestricted, regardless of whether it is legally a derivative
+. work. (Executables containing this object code plus portions of the
+. Library will still fall under Section 6.)
+.
+. Otherwise, if the work is a derivative of the Library, you may
+. distribute the object code for the work under the terms of Section 6.
+. Any executables containing that work also fall under Section 6,
+. whether or not they are linked directly with the Library itself.
+.
+. 6. As an exception to the Sections above, you may also combine or
+. link a "work that uses the Library" with the Library to produce a
+. work containing portions of the Library, and distribute that work
+. under terms of your choice, provided that the terms permit
+. modification of the work for the customer's own use and reverse
+. engineering for debugging such modifications.
+.
+. You must give prominent notice with each copy of the work that the
+. Library is used in it and that the Library and its use are covered by
+. this License. You must supply a copy of this License. If the work
+. during execution displays copyright notices, you must include the
+. copyright notice for the Library among them, as well as a reference
+. directing the user to the copy of this License. Also, you must do one
+. of these things:
+.
+. a) Accompany the work with the complete corresponding
+. machine-readable source code for the Library including whatever
+. changes were used in the work (which must be distributed under
+. Sections 1 and 2 above); and, if the work is an executable linked
+. with the Library, with the complete machine-readable "work that
+. uses the Library", as object code and/or source code, so that the
+. user can modify the Library and then relink to produce a modified
+. executable containing the modified Library. (It is understood
+. that the user who changes the contents of definitions files in the
+. Library will not necessarily be able to recompile the application
+. to use the modified definitions.)
+.
+. b) Use a suitable shared library mechanism for linking with the
+. Library. A suitable mechanism is one that (1) uses at run time a
+. copy of the library already present on the user's computer system,
+. rather than copying library functions into the executable, and (2)
+. will operate properly with a modified version of the library, if
+. the user installs one, as long as the modified version is
+. interface-compatible with the version that the work was made with.
+.
+. c) Accompany the work with a written offer, valid for at
+. least three years, to give the same user the materials
+. specified in Subsection 6a, above, for a charge no more
+. than the cost of performing this distribution.
+.
+. d) If distribution of the work is made by offering access to copy
+. from a designated place, offer equivalent access to copy the above
+. specified materials from the same place.
+.
+. e) Verify that the user has already received a copy of these
+. materials or that you have already sent this user a copy.
+.
+. For an executable, the required form of the "work that uses the
+. Library" must include any data and utility programs needed for
+. reproducing the executable from it. However, as a special exception,
+. the materials to be distributed need not include anything that is
+. normally distributed (in either source or binary form) with the major
+. components (compiler, kernel, and so on) of the operating system on
+. which the executable runs, unless that component itself accompanies
+. the executable.
+.
+. It may happen that this requirement contradicts the license
+. restrictions of other proprietary libraries that do not normally
+. accompany the operating system. Such a contradiction means you cannot
+. use both them and the Library together in an executable that you
+. distribute.
+.
+. 7. You may place library facilities that are a work based on the
+. Library side-by-side in a single library together with other library
+. facilities not covered by this License, and distribute such a combined
+. library, provided that the separate distribution of the work based on
+. the Library and of the other library facilities is otherwise
+. permitted, and provided that you do these two things:
+.
+. a) Accompany the combined library with a copy of the same work
+. based on the Library, uncombined with any other library
+. facilities. This must be distributed under the terms of the
+. Sections above.
+.
+. b) Give prominent notice with the combined library of the fact
+. that part of it is a work based on the Library, and explaining
+. where to find the accompanying uncombined form of the same work.
+.
+. 8. You may not copy, modify, sublicense, link with, or distribute
+. the Library except as expressly provided under this License. Any
+. attempt otherwise to copy, modify, sublicense, link with, or
+. distribute the Library is void, and will automatically terminate your
+. rights under this License. However, parties who have received copies,
+. or rights, from you under this License will not have their licenses
+. terminated so long as such parties remain in full compliance.
+.
+. 9. You are not required to accept this License, since you have not
+. signed it. However, nothing else grants you permission to modify or
+. distribute the Library or its derivative works. These actions are
+. prohibited by law if you do not accept this License. Therefore, by
+. modifying or distributing the Library (or any work based on the
+. Library), you indicate your acceptance of this License to do so, and
+. all its terms and conditions for copying, distributing or modifying
+. the Library or works based on it.
+.
+. 10. Each time you redistribute the Library (or any work based on the
+. Library), the recipient automatically receives a license from the
+. original licensor to copy, distribute, link with or modify the Library
+. subject to these terms and conditions. You may not impose any further
+. restrictions on the recipients' exercise of the rights granted herein.
+. You are not responsible for enforcing compliance by third parties with
+. this License.
+.
+. 11. If, as a consequence of a court judgment or allegation of patent
+. infringement or for any other reason (not limited to patent issues),
+. conditions are imposed on you (whether by court order, agreement or
+. otherwise) that contradict the conditions of this License, they do not
+. excuse you from the conditions of this License. If you cannot
+. distribute so as to satisfy simultaneously your obligations under this
+. License and any other pertinent obligations, then as a consequence you
+. may not distribute the Library at all. For example, if a patent
+. license would not permit royalty-free redistribution of the Library by
+. all those who receive copies directly or indirectly through you, then
+. the only way you could satisfy both it and this License would be to
+. refrain entirely from distribution of the Library.
+.
+. If any portion of this section is held invalid or unenforceable under any
+. particular circumstance, the balance of the section is intended to apply,
+. and the section as a whole is intended to apply in other circumstances.
+.
+. It is not the purpose of this section to induce you to infringe any
+. patents or other property right claims or to contest validity of any
+. such claims; this section has the sole purpose of protecting the
+. integrity of the free software distribution system which is
+. implemented by public license practices. Many people have made
+. generous contributions to the wide range of software distributed
+. through that system in reliance on consistent application of that
+. system; it is up to the author/donor to decide if he or she is willing
+. to distribute software through any other system and a licensee cannot
+. impose that choice.
+.
+. This section is intended to make thoroughly clear what is believed to
+. be a consequence of the rest of this License.
+.
+. 12. If the distribution and/or use of the Library is restricted in
+. certain countries either by patents or by copyrighted interfaces, the
+. original copyright holder who places the Library under this License may add
+. an explicit geographical distribution limitation excluding those countries,
+. so that distribution is permitted only in or among countries not thus
+. excluded. In such case, this License incorporates the limitation as if
+. written in the body of this License.
+.
+. 13. The Free Software Foundation may publish revised and/or new
+. versions of the Lesser General Public License from time to time.
+. Such new versions will be similar in spirit to the present version,
+. but may differ in detail to address new problems or concerns.
+.
+. Each version is given a distinguishing version number. If the Library
+. specifies a version number of this License which applies to it and
+. "any later version", you have the option of following the terms and
+. conditions either of that version or of any later version published by
+. the Free Software Foundation. If the Library does not specify a
+. license version number, you may choose any version ever published by
+. the Free Software Foundation.
+.
+. 14. If you wish to incorporate parts of the Library into other free
+. programs whose distribution conditions are incompatible with these,
+. write to the author to ask for permission. For software which is
+. copyrighted by the Free Software Foundation, write to the Free
+. Software Foundation; we sometimes make exceptions for this. Our
+. decision will be guided by the two goals of preserving the free status
+. of all derivatives of our free software and of promoting the sharing
+. and reuse of software generally.
+.
+. NO WARRANTY
+.
+. 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+. WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+. OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+. KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+. PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+. LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+. THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+.
+. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+. WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+. AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+. FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+. CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+. LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+. RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+. FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+. SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+. DAMAGES.
+.
+. END OF TERMS AND CONDITIONS
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/37/MANIFEST-20051228065045-05f3d0009771a020.weave
+++ urlgrabber-2.9.7/.bzr/weaves/37/MANIFEST-20051228065045-05f3d0009771a020.weave
@@ -0,0 +1,31 @@
+# bzr weave file v5
+i
+1 de69469602977898728adbaee3924df811cf7dc7
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+{ 0
+. ChangeLog
+. LICENSE
+. MANIFEST
+. README
+. TODO
+. setup.py
+. scripts/urlgrabber
+. test/base_test_code.py
+. test/grabberperf.py
+. test/munittest.py
+. test/runtests.py
+. test/test_byterange.py
+. test/test_grabber.py
+. test/test_keepalive.py
+. test/test_mirror.py
+. test/threading/batchgrabber.py
+. urlgrabber/__init__.py
+. urlgrabber/byterange.py
+. urlgrabber/grabber.py
+. urlgrabber/keepalive.py
+. urlgrabber/mirror.py
+. urlgrabber/progress.py
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/03/PKGINFO-20051228065045-7a5a1644923516b9.weave
+++ urlgrabber-2.9.7/.bzr/weaves/03/PKGINFO-20051228065045-7a5a1644923516b9.weave
@@ -0,0 +1,66 @@
+# bzr weave file v5
+i
+1 4d27580420328b1d8d6ff9be9c3e919cc9aa40b9
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+i 0
+1 bb2a4bf91024e2ac0829c026472d64f568da193d
+n wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
+
+w
+{ 0
+. Metadata-Version: 1.0
+. Name: urlgrabber
+[ 1
+. Version: 2.9.6
+] 1
+{ 1
+. Version: 2.9.7
+}
+. Summary: A high-level cross-protocol url-grabber
+. Home-page: http://linux.duke.edu/projects/urlgrabber/
+. Author: Michael D. Stenner, Ryan Tomayko
+. Author-email: mstenner@linux.duke.edu, rtomayko@naeblis.cx
+. License: LGPL
+. Description: A high-level cross-protocol url-grabber.
+.
+. Using urlgrabber, data can be fetched in three basic ways:
+.
+. urlgrab(url) copy the file to the local filesystem
+. urlopen(url) open the remote file and return a file object
+. (like urllib2.urlopen)
+. urlread(url) return the contents of the file as a string
+.
+. When using these functions (or methods), urlgrabber supports the
+. following features:
+.
+. * identical behavior for http://, ftp://, and file:// urls
+. * http keepalive - faster downloads of many files by using
+. only a single connection
+. * byte ranges - fetch only a portion of the file
+. * reget - for a urlgrab, resume a partial download
+. * progress meters - the ability to report download progress
+. automatically, even when using urlopen!
+. * throttling - restrict bandwidth usage
+. * retries - automatically retry a download if it fails. The
+. number of retries and failure types are configurable.
+. * authenticated server access for http and ftp
+. * proxy support - support for authenticated http and ftp proxies
+. * mirror groups - treat a list of mirrors as a single source,
+. automatically switching mirrors if there is a failure.
+.
+. Platform: UNKNOWN
+. Classifier: Development Status :: 4 - Beta
+. Classifier: Environment :: Console
+. Classifier: Environment :: Web Environment
+. Classifier: Intended Audience :: Developers
+. Classifier: Intended Audience :: System Administrators
+. Classifier: License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)
+. Classifier: Operating System :: POSIX
+. Classifier: Operating System :: POSIX :: Linux
+. Classifier: Programming Language :: Python
+. Classifier: Topic :: Internet :: File Transfer Protocol (FTP)
+. Classifier: Topic :: Internet :: WWW/HTTP
+. Classifier: Topic :: Software Development :: Libraries :: Python Modules
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/bb/README-20051228065045-d9e5d605bbacc2f1.weave
+++ urlgrabber-2.9.7/.bzr/weaves/bb/README-20051228065045-d9e5d605bbacc2f1.weave
@@ -0,0 +1,40 @@
+# bzr weave file v5
+i
+1 ebb80dc2bfbf9c4bef9c6bcec58461552d87730a
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+{ 0
+. urlgrabber -- A high-level cross-protocol url-grabber
+.
+. INSTALLATION INSTRUCTIONS
+.
+. If you want to install urlgrabber on your system, simply open the package
+. and run:
+.
+. python setup.py install
+.
+. Take a look at the install options by doing
+.
+. python setup.py install --help
+.
+. With no arguments, distutils will add all modules to a 'urlgrabber'
+. package under your python site-packages directory.
+.
+. You can build rpms by running
+.
+. python setup.py bdist_rpm
+.
+. The rpms (both source and "binary") will be specific to the current
+. distrubution/version and may not be portable to others. This is
+. because they will be built for the currently installed python.
+.
+. keepalive.py and byterange.py are generic urllib2 extension modules and
+. can be used to add keepalive and range support to any urllib2
+. application.
+.
+. As of version 2.9.x, urlgrabber is no longer tested with python
+. versions less than 2.2. It will probably work with 2.0, but will
+. almost certainly NOT work under prior python versions.
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/ac/TODO-20051228065045-01a634a623a4f8bc.weave
+++ urlgrabber-2.9.7/.bzr/weaves/ac/TODO-20051228065045-01a634a623a4f8bc.weave
@@ -0,0 +1,77 @@
+# bzr weave file v5
+i
+1 f9a3e8317125c922e66c55f843e39aee264f64d2
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+i 0
+1 66439d21f3a2f5180f24a99233a09808c9a46e89
+n wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
+
+w
+{ 0
+. ALPHA 2:
+.
+. * web page
+. - better examples page
+.
+. * threading/batch
+. - (rt) propose an interface for threaded batch downloads
+. - (mds) design a new progress-meter interface for threaded
+. multi-file downloads
+. - (rt) look at CacheFTPHandler and its implications for batch mode
+. and byte-ranges/reget
+.
+. * progress meter stuff
+. - support for retrying a file (in a MirrorGroup, for example)
+. - failure support (done?)
+. - support for when we have less information (no sizes, etc)
+. - check compatibility with gui interfaces
+. - starting a download with some parts already read (with reget,
+. for example)
+.
+[ 1
+. * Add decent debugging code and a URLGRABBER_DEBUG environment
+. variable so that users of miscellaneous applications can get the
+. debugging output
+.
+. * Have a plan for KeyboardInterupt exception handling. This is driven by
+. recent chatter on the yum list about Ctrl-C handling. Seth suggests
+. Ctrl-C as a "Skip to Next Mirror" trigger when processing MirrorGroups.
+.
+] 1
+. * look at making the 'check_timestamp' reget mode work with ftp.
+. Currently, we NEVER get a timestamp back, so we can't compare.
+. We'll probably need to subclass/replace either the urllib2 FTP handler
+. or the ftplib FTP object (or both, but I doubt it). It may or may not
+. be worth it just for this one mode of reget. It fails safely - by
+. getting the entire file.
+[ 1
+.
+. * ipv6
+] 1
+.
+. * cache dns lookups -- for a possible approach, see
+. https://lists.dulug.duke.edu/pipermail/yum-devel/2004-March/000136.html
+.
+. Misc/Maybe:
+.
+. * BatchURLGrabber/BatchMirrorGroup for concurrent downloads and possibly to
+. handle forking into secure/setuid sandbox.
+.
+. * Consider adding a progress_meter implementation that can be used in
+. concurrent download situations (I have some ideas about this -mds)
+.
+. * Consider using CacheFTPHandler instead of FTPHandler in byterange.py.
+. CacheFTPHandler reuses connections but this may lead to problems with
+. ranges. I've tested CacheFTPHandler with ranges using vsftpd as a
+. server and everything works fine but this needs more exhaustive tests
+. or a fallback mechanism. Also, CacheFTPHandler breaks with multiple
+. threads.
+.
+. * Consider some statistics tracking so that urlgrabber can record the
+. speed/reliability of different servers. This could then be used by
+. the mirror code for choosing optimal servers (slick, eh?)
+.
+. * check SSL certs. This may require PyOpenSSL.
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/d5/scripts-20051228065045-0a21bb800de0fa5e.weave
+++ urlgrabber-2.9.7/.bzr/weaves/d5/scripts-20051228065045-0a21bb800de0fa5e.weave
@@ -0,0 +1,7 @@
+# bzr weave file v5
+i
+1 da39a3ee5e6b4b0d3255bfef95601890afd80709
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/67/urlgrabber-20051228065045-3092dfe3fdda0fbe.weave
+++ urlgrabber-2.9.7/.bzr/weaves/67/urlgrabber-20051228065045-3092dfe3fdda0fbe.weave
@@ -0,0 +1,133 @@
+# bzr weave file v5
+i
+1 c7a058b6afc80056e23b37a11c866045a5c8fc36
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+{ 0
+. #!/usr/bin/python -t
+.
+. # This library is free software; you can redistribute it and/or
+. # modify it under the terms of the GNU Lesser General Public
+. # License as published by the Free Software Foundation; either
+. # version 2.1 of the License, or (at your option) any later version.
+. #
+. # This library is distributed in the hope that it will be useful,
+. # but WITHOUT ANY WARRANTY; without even the implied warranty of
+. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+. # Lesser General Public License for more details.
+. #
+. # You should have received a copy of the GNU Lesser General Public
+. # License along with this library; if not, write to the
+. # Free Software Foundation, Inc.,
+. # 59 Temple Place, Suite 330,
+. # Boston, MA 02111-1307 USA
+.
+. # This file is part of urlgrabber, a high-level cross-protocol url-grabber
+. # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+.
+. """Usage: urlgrabber [OPTIONS] URL [FILE]
+.
+. OPTIONS
+.
+. --copy-local ignored except for file:// urls, in which case
+. it specifies whether urlgrab should still make
+. a copy of the file, or simply point to the
+. existing copy.
+. --throttle=NUMBER if it's an int, it's the bytes/second throttle
+. limit. If it's a float, it is first multiplied
+. by bandwidth. If throttle == 0, throttling is
+. disabled. If None, the module-level default
+. (which can be set with set_throttle) is used.
+. --bandwidth=NUMBER the nominal max bandwidth in bytes/second. If
+. throttle is a float and bandwidth == 0,
+. throttling is disabled. If None, the
+. module-level default (which can be set with
+. set_bandwidth) is used.
+. --range=RANGE a tuple of the form first_byte,last_byte
+. describing a byte range to retrieve. Either or
+. both of the values may be specified. If
+. first_byte is None, byte offset 0 is assumed.
+. If last_byte is None, the last byte available
+. is assumed. Note that both first and last_byte
+. values are inclusive so a range of (10,11)
+. would return the 10th and 11th bytes of the
+. resource.
+. --user-agent=STR the user-agent string provide if the url is HTTP.
+.
+. RETRY OPTIONS
+.
+. --retry=NUMBER the number of times to retry the grab before
+. bailing. If this is zero, it will retry
+. forever. This was intentional... really, it was
+. :). If this value is not supplied or is supplied
+. but is None retrying does not occur.
+. --retrycodes a sequence of errorcodes (values of e.errno) for
+. which it should retry. See the doc on
+. URLGrabError for more details on this. retrycodes
+. defaults to -1,2,4,5,6,7 if not specified
+. explicitly.
+. """
+.
+. # $Id: urlgrabber,v 1.4 2004/09/07 21:19:54 mstenner Exp $
+.
+. import sys
+. import urlgrabber.grabber
+. from urlgrabber.grabber import urlgrab
+.
+. DEBUG=0
+.
+. def main():
+. kwargs = {}
+. url = None
+. file = None
+. proxies = {}
+.
+. for arg in sys.argv[1:]:
+. if arg in ('--help','-h','-?'):
+. print __doc__
+. sys.exit(0)
+. elif arg in ('--debug', '-d'):
+. global DEBUG
+. DEBUG = 1
+. elif arg.startswith('--'):
+. ls = arg[2:].split('=')
+. a,v = (ls[0],len(ls) > 1 and ls[1] or 1)
+. a = a.replace('-','_')
+. if v is None: v = 1
+. if a in ('retrycodes','range'):
+. v = v.split(',')
+. if a.endswith('_proxy'):
+. proxies[ a[0:a.find('_')] ] = v
+. continue
+. if DEBUG: print a, '=', v
+. kwargs[a] = v
+. elif not url:
+. url = arg
+. elif not file:
+. file = arg
+. else:
+. print "Bad usage. Try %s --help" % (sys.argv[0],)
+. sys.exit(99)
+. if url is None:
+. print "Bad usage. Try %s --help" % (sys.argv[0],)
+. sys.exit(99)
+. if len(proxies):
+. kwargs['proxies'] = proxies
+. if DEBUG:
+. print 'kwargs: ', kwargs
+. print 'URL: ', url
+. print 'FILE: ', file
+.
+. try: from progress import text_progress_meter
+. except ImportError, e: pass
+. else: kwargs['progress_obj'] = text_progress_meter()
+.
+. urlgrabber.grabber.DEBUG = DEBUG
+. filename = urlgrab(url,filename=file,**kwargs)
+. print "file written to %s" % (filename,)
+.
+. if __name__ == '__main__':
+. main()
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/9b/setup.py-20051228065045-a04615cb059b9d94.weave
+++ urlgrabber-2.9.7/.bzr/weaves/9b/setup.py-20051228065045-a04615cb059b9d94.weave
@@ -0,0 +1,54 @@
+# bzr weave file v5
+i
+1 f3f5d15a08434e9ce60927b244c704caf9250f38
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+{ 0
+. # urlgrabber distutils setup
+. import re as _re
+. import urlgrabber as _urlgrabber
+.
+. name = "urlgrabber"
+. description = "A high-level cross-protocol url-grabber"
+. long_description = _urlgrabber.__doc__
+. license = "LGPL"
+. version = _urlgrabber.__version__
+. _authors = _re.split(r',\s+', _urlgrabber.__author__)
+. author = ', '.join([_re.sub(r'\s+<.*', r'', _) for _ in _authors])
+. author_email = ', '.join([_re.sub(r'(^.*<)|(>.*$)', r'', _) for _ in _authors])
+. url = _urlgrabber.__url__
+.
+. packages = ['urlgrabber']
+. package_dir = {'urlgrabber':'urlgrabber'}
+. scripts = ['scripts/urlgrabber']
+. data_files = [('share/doc/' + name + '-' + version,
+. ['README','LICENSE', 'TODO', 'ChangeLog'])]
+. options = { 'clean' : { 'all' : 1 } }
+. classifiers = [
+. 'Development Status :: 4 - Beta',
+. 'Environment :: Console',
+. 'Environment :: Web Environment',
+. 'Intended Audience :: Developers',
+. 'Intended Audience :: System Administrators',
+. 'License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)',
+. 'Operating System :: POSIX',
+. 'Operating System :: POSIX :: Linux',
+. 'Programming Language :: Python',
+. 'Topic :: Internet :: File Transfer Protocol (FTP)',
+. 'Topic :: Internet :: WWW/HTTP',
+. 'Topic :: Software Development :: Libraries :: Python Modules'
+. ]
+.
+. # load up distutils
+. if __name__ == '__main__':
+. config = globals().copy()
+. keys = config.keys()
+. for k in keys:
+. #print '%-20s -> %s' % (k, config[k])
+. if k.startswith('_'): del config[k]
+.
+. from distutils.core import setup
+. setup(**config)
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/05/test-20051228065045-a12c62b7a4574c20.weave
+++ urlgrabber-2.9.7/.bzr/weaves/05/test-20051228065045-a12c62b7a4574c20.weave
@@ -0,0 +1,7 @@
+# bzr weave file v5
+i
+1 da39a3ee5e6b4b0d3255bfef95601890afd80709
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/aa/base_test_code.py-20051228065045-46e61ae2d5c72abf.weave
+++ urlgrabber-2.9.7/.bzr/weaves/aa/base_test_code.py-20051228065045-46e61ae2d5c72abf.weave
@@ -0,0 +1,42 @@
+# bzr weave file v5
+i
+1 9c84f2ef28881455db4d456e2ed04eea68672dd0
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+{ 0
+. from munittest import *
+.
+. base_http = 'http://www.linux.duke.edu/projects/urlgrabber/test/'
+. base_ftp = 'ftp://localhost/test/'
+.
+. # set to a proftp server only. we're working around a couple of
+. # bugs in their implementation in byterange.py.
+. base_proftp = 'ftp://localhost/test/'
+.
+. reference_data = ''.join( [str(i)+'\n' for i in range(20000) ] )
+. ref_http = base_http + 'reference'
+. ref_ftp = base_ftp + 'reference'
+. ref_proftp = base_proftp + 'reference'
+. short_reference_data = ' '.join( [str(i) for i in range(10) ] )
+. short_ref_http = base_http + 'short_reference'
+. short_ref_ftp = base_ftp + 'short_reference'
+.
+. ref_200 = ref_http
+. ref_404 = base_http + 'nonexistent_file'
+. ref_403 = base_http + 'mirror/broken/'
+.
+. base_mirror_url = base_http + 'mirror/'
+. good_mirrors = ['m1', 'm2', 'm3']
+. mirror_files = ['test1.txt', 'test2.txt']
+. bad_mirrors = ['broken']
+. bad_mirror_files = ['broken.txt']
+.
+. proxy_proto = 'http'
+. proxy_host = 'localhost'
+. proxy_port = 8888
+. proxy_user = 'proxyuser'
+. good_proxy_pass = 'proxypass'
+. bad_proxy_pass = 'badproxypass'
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/84/grabberperf.py-20051228065045-b785ab798deaed31.weave
+++ urlgrabber-2.9.7/.bzr/weaves/84/grabberperf.py-20051228065045-b785ab798deaed31.weave
@@ -0,0 +1,148 @@
+# bzr weave file v5
+i
+1 908c2e723bc3da9e1b852e8740b6c05a7b1afd12
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+{ 0
+. #!/usr/bin/python -t
+.
+. # This library is free software; you can redistribute it and/or
+. # modify it under the terms of the GNU Lesser General Public
+. # License as published by the Free Software Foundation; either
+. # version 2.1 of the License, or (at your option) any later version.
+. #
+. # This library is distributed in the hope that it will be useful,
+. # but WITHOUT ANY WARRANTY; without even the implied warranty of
+. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+. # Lesser General Public License for more details.
+. #
+. # You should have received a copy of the GNU Lesser General Public
+. # License along with this library; if not, write to the
+. # Free Software Foundation, Inc.,
+. # 59 Temple Place, Suite 330,
+. # Boston, MA 02111-1307 USA
+.
+. # This file is part of urlgrabber, a high-level cross-protocol url-grabber
+. # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+.
+. import sys
+. import os
+. from os.path import dirname, join as joinpath
+. import tempfile
+. import time
+.
+. import urlgrabber.grabber as grabber
+. from urlgrabber.grabber import URLGrabber, urlgrab, urlopen, urlread
+. from urlgrabber.progress import text_progress_meter
+.
+. tempsrc = '/tmp/ug-test-src'
+. tempdst = '/tmp/ug-test-dst'
+.
+. # this isn't used but forces a proxy handler to be
+. # added when creating the urllib2 opener.
+. proxies = { 'http' : 'http://localhost' }
+. DEBUG=0
+.
+. def main():
+. speedtest(1024) # 1KB
+. speedtest(10 * 1024) # 10 KB
+. speedtest(100 * 1024) # 100 KB
+. speedtest(1000 * 1024) # 1,000 KB (almost 1MB)
+. #speedtest(10000 * 1024) # 10,000 KB (almost 10MB)
+. # remove temp files
+. os.unlink(tempsrc)
+. os.unlink(tempdst)
+.
+. def setuptemp(size):
+. if DEBUG: print 'writing %d KB to temporary file (%s).' % (size / 1024, tempsrc)
+. file = open(tempsrc, 'w', 1024)
+. chars = '0123456789'
+. for i in range(size):
+. file.write(chars[i % 10])
+. file.flush()
+. file.close()
+.
+. def speedtest(size):
+. setuptemp(size)
+. full_times = []
+. raw_times = []
+. none_times = []
+. throttle = 2**40 # throttle to 1 TB/s :)
+.
+. try:
+. from urlgrabber.progress import text_progress_meter
+. except ImportError, e:
+. tpm = None
+. print 'not using progress meter'
+. else:
+. tpm = text_progress_meter(fo=open('/dev/null', 'w'))
+.
+. # to address concerns that the overhead from the progress meter
+. # and throttling slow things down, we do this little test.
+. #
+. # using this test, you get the FULL overhead of the progress
+. # meter and throttling, without the benefit: the meter is directed
+. # to /dev/null and the throttle bandwidth is set EXTREMELY high.
+. #
+. # note: it _is_ even slower to direct the progress meter to a real
+. # tty or file, but I'm just interested in the overhead from _this_
+. # module.
+.
+. # get it nicely cached before we start comparing
+. if DEBUG: print 'pre-caching'
+. for i in range(100):
+. urlgrab(tempsrc, tempdst, copy_local=1, throttle=None, proxies=proxies)
+.
+. if DEBUG: print 'running speed test.'
+. reps = 500
+. for i in range(reps):
+. if DEBUG:
+. print '\r%4i/%-4i' % (i+1, reps),
+. sys.stdout.flush()
+. t = time.time()
+. urlgrab(tempsrc, tempdst,
+. copy_local=1, progress_obj=tpm,
+. throttle=throttle, proxies=proxies)
+. full_times.append(1000 * (time.time() - t))
+.
+. t = time.time()
+. urlgrab(tempsrc, tempdst,
+. copy_local=1, progress_obj=None,
+. throttle=None, proxies=proxies)
+. raw_times.append(1000 * (time.time() - t))
+.
+. t = time.time()
+. in_fo = open(tempsrc)
+. out_fo = open(tempdst, 'wb')
+. while 1:
+. s = in_fo.read(1024 * 8)
+. if not s: break
+. out_fo.write(s)
+. in_fo.close()
+. out_fo.close()
+. none_times.append(1000 * (time.time() - t))
+.
+. if DEBUG: print '\r'
+.
+. print "%d KB Results:" % (size / 1024)
+. print_result('full', full_times)
+. print_result('raw', raw_times)
+. print_result('none', none_times)
+.
+. grabber.close_all()
+.
+. def print_result(label, result_list):
+. format = '[%4s] mean: %6.3f ms, median: %6.3f ms, ' \
+. 'min: %6.3f ms, max: %6.3f ms'
+. result_list.sort()
+. mean = 0.0
+. for i in result_list: mean += i
+. mean = mean/len(result_list)
+. median = result_list[int(len(result_list)/2)]
+. print format % (label, mean, median, result_list[0], result_list[-1])
+.
+. if __name__ == '__main__':
+. main()
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/51/munittest.py-20051228065045-087561c661caa0e5.weave
+++ urlgrabber-2.9.7/.bzr/weaves/51/munittest.py-20051228065045-087561c661caa0e5.weave
@@ -0,0 +1,943 @@
+# bzr weave file v5
+i
+1 1e61499eebe6587d5147382acdf3b3ea295c4b26
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+{ 0
+. #!/usr/bin/env python
+. """
+. This is a modified version of the unittest module has been modified by
+. Michael D. Stenner from Steve Purcell's version (revision 1.46, as
+. distributed with python 2.3.3) in the following ways:
+.
+. * the text formatting has been made much prettier by printing "nested"
+. test suites
+. * the test resulte "skip" has been added for skipping tests. A test
+. can call any of the .skip() .skipUnless(), or .skipIf()
+. methods from within the test method or the setUp method.
+. * all attributes originally named with leading "__" have been changed
+. to a single "_". This makes subclassing much easier.
+.
+. COMPATIBILITY
+.
+. It should be possible to drop this in as replacement for the
+. standard unittest module simply by doing:
+.
+. import munittest as unittest
+.
+. In fact, the reverse is ALMOST true. Test code written for this
+. module very nearly runs perfectly with the standard unittest module.
+. Exceptions are:
+.
+. * The .skip() methods will obviously not work on the standard
+. unittest. However, they will ERROR out and the error message will
+. complain about missing .skip() attributes, so it will be obvious and
+. will have the same effect as skipping.
+.
+. * the .setDescription method (or description argument) for
+. TestSuite will not work. However, setting the .description
+. attribute on a standard TestSuite instance does no harm, so if
+. need to set them manually (you're not satisfied with the
+. doc-string route) and you WANT to be compatible both ways, do
+. that :)
+.
+. DESCRIPTIONS
+.
+. Names for suites in the pretty formatting are (like the test
+. functions) slurped from the doc-strings of the corresponding object,
+. or taken from the names of those objects. This applies to both
+. TestCase-derived classes, and modules. Also, the TestSuite class
+. description can be set manually in a number of ways (all of which
+. achieve the same result):
+.
+. suite = TestSuite(test_list, 'this is the description')
+. suite.setDescription('this is the description')
+. suite.description = 'this is the description'
+.
+. Michael D. Stenner
+. 2004/03/18
+. v0.1
+. ===========================================================================
+. The original doc-string for this module follows:
+. ===========================================================================
+. Python unit testing framework, based on Erich Gamma's JUnit and Kent Beck's
+. Smalltalk testing framework.
+.
+. This module contains the core framework classes that form the basis of
+. specific test cases and suites (TestCase, TestSuite etc.), and also a
+. text-based utility class for running the tests and reporting the results
+. (TextTestRunner).
+.
+. Simple usage:
+.
+. import unittest
+.
+. class IntegerArithmenticTestCase(unittest.TestCase):
+. def testAdd(self): ## test method names begin 'test*'
+. self.assertEquals((1 + 2), 3)
+. self.assertEquals(0 + 1, 1)
+. def testMultiply(self):
+. self.assertEquals((0 * 10), 0)
+. self.assertEquals((5 * 8), 40)
+.
+. if __name__ == '__main__':
+. unittest.main()
+.
+. Further information is available in the bundled documentation, and from
+.
+. http://pyunit.sourceforge.net/
+.
+. Copyright (c) 1999, 2000, 2001 Steve Purcell
+. This module is free software, and you may redistribute it and/or modify
+. it under the same terms as Python itself, so long as this copyright message
+. and disclaimer are retained in their original form.
+.
+. IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
+. SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
+. THIS CODE, EVEN IF THE AUTHOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+. DAMAGE.
+.
+. THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+. PARTICULAR PURPOSE. THE CODE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS,
+. AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
+. SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+. """
+.
+. # $Id: munittest.py,v 1.2 2004/03/31 01:27:24 mstenner Exp $
+.
+. import time
+. import sys
+. import traceback
+. import string
+. import os
+. import types
+.
+. ##############################################################################
+. # Exported classes and functions
+. ##############################################################################
+. __all__ = ['TestResult', 'TestCase', 'TestSuite', 'TextTestRunner',
+. 'TestLoader', 'FunctionTestCase', 'main', 'defaultTestLoader']
+.
+. # Expose obsolete functions for backwards compatability
+. __all__.extend(['getTestCaseNames', 'makeSuite', 'findTestCases'])
+.
+.
+. ##############################################################################
+. # Test framework core
+. ##############################################################################
+.
+. # All classes defined herein are 'new-style' classes, allowing use of 'super()'
+. __metaclass__ = type
+.
+. def _strclass(cls):
+. return "%s.%s" % (cls.__module__, cls.__name__)
+.
+. class TestResult:
+. """Holder for test result information.
+.
+. Test results are automatically managed by the TestCase and TestSuite
+. classes, and do not need to be explicitly manipulated by writers of tests.
+.
+. Each instance holds the total number of tests run, and collections of
+. failures and errors that occurred among those test runs. The collections
+. contain tuples of (testcase, exceptioninfo), where exceptioninfo is the
+. formatted traceback of the error that occurred.
+. """
+. def __init__(self):
+. self.failures = []
+. self.errors = []
+. self.skipped = []
+. self.testsRun = 0
+. self.shouldStop = 0
+.
+. def startTest(self, test):
+. "Called when the given test is about to be run"
+. self.testsRun = self.testsRun + 1
+.
+. def stopTest(self, test):
+. "Called when the given test has been run"
+. pass
+.
+. def startSuite(self, suite):
+. "Called when the given suite is about to be run"
+. pass
+.
+. def stopSuit(self, suite):
+. "Called when the tiven suite has been run"
+. pass
+.
+. def addError(self, test, err):
+. """Called when an error has occurred. 'err' is a tuple of values as
+. returned by sys.exc_info().
+. """
+. self.errors.append((test, self._exc_info_to_string(err)))
+.
+. def addFailure(self, test, err):
+. """Called when an error has occurred. 'err' is a tuple of values as
+. returned by sys.exc_info()."""
+. self.failures.append((test, self._exc_info_to_string(err)))
+.
+. def addSuccess(self, test):
+. "Called when a test has completed successfully"
+. pass
+.
+. def addSkip(self, test, err):
+. "Called when the test has been skipped"
+. self.skipped.append((test, self._exc_info_to_string(err)))
+.
+. def wasSuccessful(self):
+. "Tells whether or not this result was a success"
+. return len(self.failures) == len(self.errors) == 0
+.
+. def stop(self):
+. "Indicates that the tests should be aborted"
+. self.shouldStop = 1
+.
+. def _exc_info_to_string(self, err):
+. """Converts a sys.exc_info()-style tuple of values into a string."""
+. return string.join(traceback.format_exception(*err), '')
+.
+. def __repr__(self):
+. return "<%s run=%i errors=%i failures=%i>" % \
+. (_strclass(self.__class__), self.testsRun, len(self.errors),
+. len(self.failures))
+.
+.
+. class TestCase:
+. """A class whose instances are single test cases.
+.
+. By default, the test code itself should be placed in a method named
+. 'runTest'.
+.
+. If the fixture may be used for many test cases, create as
+. many test methods as are needed. When instantiating such a TestCase
+. subclass, specify in the constructor arguments the name of the test method
+. that the instance is to execute.
+.
+. Test authors should subclass TestCase for their own tests. Construction
+. and deconstruction of the test's environment ('fixture') can be
+. implemented by overriding the 'setUp' and 'tearDown' methods respectively.
+.
+. If it is necessary to override the __init__ method, the base class
+. __init__ method must always be called. It is important that subclasses
+. should not change the signature of their __init__ method, since instances
+. of the classes are instantiated automatically by parts of the framework
+. in order to be run.
+. """
+.
+. # This attribute determines which exception will be raised when
+. # the instance's assertion methods fail; test methods raising this
+. # exception will be deemed to have 'failed' rather than 'errored'
+.
+. failureException = AssertionError
+.
+. # test methods raising the following exception will be considered
+. # skipped - this is neither pass, fail, or error. it should be
+. # used when some resource needed to perform the test isn't avialable,
+. # or when a lengthy test is deliberately skipped for time.
+.
+. class skipException(Exception): pass
+.
+. # whether receiving KeyboardInterrupt during setUp or the test causes
+. # the test to be interpreted as skipped. The default is no. It's
+. # probably best to do:
+. # except KeyboardInterrupt: self.skip()
+. # inside the test method
+.
+. interrupt_skips = 0
+.
+. def __init__(self, methodName='runTest'):
+. """Create an instance of the class that will use the named test
+. method when executed. Raises a ValueError if the instance does
+. not have a method with the specified name.
+. """
+. try:
+. self._testMethodName = methodName
+. testMethod = getattr(self, methodName)
+. self._testMethodDoc = testMethod.__doc__
+. except AttributeError:
+. raise ValueError, "no such test method in %s: %s" % \
+. (self.__class__, methodName)
+.
+. def setUp(self):
+. "Hook method for setting up the test fixture before exercising it."
+. pass
+.
+. def tearDown(self):
+. "Hook method for deconstructing the test fixture after testing it."
+. pass
+.
+. def countTestCases(self):
+. return 1
+.
+. def defaultTestResult(self):
+. return TestResult()
+.
+. def shortDescription(self):
+. """Returns a one-line description of the test, or None if no
+. description has been provided.
+.
+. The default implementation of this method returns the first line of
+. the specified test method's docstring.
+. """
+. doc = self._testMethodDoc
+. return doc and string.strip(string.split(doc, "\n")[0]) or None
+.
+. def id(self):
+. return "%s.%s" % (_strclass(self.__class__), self._testMethodName)
+.
+. def __str__(self):
+. return "%s (%s)" % (self._testMethodName, _strclass(self.__class__))
+.
+. def __repr__(self):
+. return "<%s testMethod=%s>" % \
+. (_strclass(self.__class__), self._testMethodName)
+.
+. def run(self, result=None):
+. return self(result)
+.
+. def __call__(self, result=None):
+. if result is None: result = self.defaultTestResult()
+. result.startTest(self)
+. testMethod = getattr(self, self._testMethodName)
+. try:
+. try:
+. self.setUp()
+. except KeyboardInterrupt:
+. if self.interrupt_skips:
+. result.addSkip(self, self._exc_info())
+. return
+. else:
+. raise
+. except self.skipException:
+. result.addSkip(self, self._exc_info())
+. return
+. except:
+. result.addError(self, self._exc_info())
+. return
+.
+. ok = 0
+. try:
+. testMethod()
+. ok = 1
+. except self.failureException:
+. result.addFailure(self, self._exc_info())
+. except KeyboardInterrupt:
+. if self.interrupt_skips:
+. result.addSkip(self, self._exc_info())
+. return
+. else:
+. raise
+. except self.skipException:
+. result.addSkip(self, self._exc_info())
+. return
+. except:
+. result.addError(self, self._exc_info())
+.
+. try:
+. self.tearDown()
+. except KeyboardInterrupt:
+. raise
+. except:
+. result.addError(self, self._exc_info())
+. ok = 0
+. if ok: result.addSuccess(self)
+. finally:
+. result.stopTest(self)
+.
+. def debug(self):
+. """Run the test without collecting errors in a TestResult"""
+. self.setUp()
+. getattr(self, self._testMethodName)()
+. self.tearDown()
+.
+. def _exc_info(self):
+. """Return a version of sys.exc_info() with the traceback frame
+. minimised; usually the top level of the traceback frame is not
+. needed.
+. """
+. exctype, excvalue, tb = sys.exc_info()
+. if sys.platform[:4] == 'java': ## tracebacks look different in Jython
+. return (exctype, excvalue, tb)
+. newtb = tb.tb_next
+. if newtb is None:
+. return (exctype, excvalue, tb)
+. return (exctype, excvalue, newtb)
+.
+. def fail(self, msg=None):
+. """Fail immediately, with the given message."""
+. raise self.failureException, msg
+.
+. def failIf(self, expr, msg=None):
+. "Fail the test if the expression is true."
+. if expr: raise self.failureException, msg
+.
+. def failUnless(self, expr, msg=None):
+. """Fail the test unless the expression is true."""
+. if not expr: raise self.failureException, msg
+.
+. def failUnlessRaises(self, excClass, callableObj, *args, **kwargs):
+. """Fail unless an exception of class excClass is thrown
+. by callableObj when invoked with arguments args and keyword
+. arguments kwargs. If a different type of exception is
+. thrown, it will not be caught, and the test case will be
+. deemed to have suffered an error, exactly as for an
+. unexpected exception.
+. """
+. try:
+. callableObj(*args, **kwargs)
+. except excClass:
+. return
+. else:
+. if hasattr(excClass,'__name__'): excName = excClass.__name__
+. else: excName = str(excClass)
+. raise self.failureException, excName
+.
+. def failUnlessEqual(self, first, second, msg=None):
+. """Fail if the two objects are unequal as determined by the '=='
+. operator.
+. """
+. if not first == second:
+. raise self.failureException, \
+. (msg or '%s != %s' % (`first`, `second`))
+.
+. def failIfEqual(self, first, second, msg=None):
+. """Fail if the two objects are equal as determined by the '=='
+. operator.
+. """
+. if first == second:
+. raise self.failureException, \
+. (msg or '%s == %s' % (`first`, `second`))
+.
+. def failUnlessAlmostEqual(self, first, second, places=7, msg=None):
+. """Fail if the two objects are unequal as determined by their
+. difference rounded to the given number of decimal places
+. (default 7) and comparing to zero.
+.
+. Note that decimal places (from zero) is usually not the same
+. as significant digits (measured from the most signficant digit).
+. """
+. if round(second-first, places) != 0:
+. raise self.failureException, \
+. (msg or '%s != %s within %s places' % (`first`, `second`, `places` ))
+.
+. def failIfAlmostEqual(self, first, second, places=7, msg=None):
+. """Fail if the two objects are equal as determined by their
+. difference rounded to the given number of decimal places
+. (default 7) and comparing to zero.
+.
+. Note that decimal places (from zero) is usually not the same
+. as significant digits (measured from the most signficant digit).
+. """
+. if round(second-first, places) == 0:
+. raise self.failureException, \
+. (msg or '%s == %s within %s places' % (`first`, `second`, `places`))
+.
+. assertEqual = assertEquals = failUnlessEqual
+.
+. assertNotEqual = assertNotEquals = failIfEqual
+.
+. assertAlmostEqual = assertAlmostEquals = failUnlessAlmostEqual
+.
+. assertNotAlmostEqual = assertNotAlmostEquals = failIfAlmostEqual
+.
+. assertRaises = failUnlessRaises
+.
+. assert_ = failUnless
+.
+. def skip(self, msg=None):
+. """Skip the test"""
+. raise self.skipException, msg
+.
+. def skipIf(self, expr, msg=None):
+. "Skip the test if the expression is true."
+. if expr: raise self.skipException, msg
+.
+. def skipUnless(self, expr, msg=None):
+. """Skip the test unless the expression is true."""
+. if not expr: raise self.skipException, msg
+.
+.
+.
+. class TestSuite:
+. """A test suite is a composite test consisting of a number of TestCases.
+.
+. For use, create an instance of TestSuite, then add test case instances.
+. When all tests have been added, the suite can be passed to a test
+. runner, such as TextTestRunner. It will run the individual test cases
+. in the order in which they were added, aggregating the results. When
+. subclassing, do not forget to call the base class constructor.
+. """
+. def __init__(self, tests=(), description=None):
+. self._tests = []
+. self.addTests(tests)
+. self.description = description or '(no description)'
+.
+. def __repr__(self):
+. return "<%s tests=%s>" % (_strclass(self.__class__), self._tests)
+.
+. __str__ = __repr__
+.
+. def shortDescription(self):
+. return self.description
+.
+. def setDescription(self, description):
+. self.description = description
+.
+. def countTestCases(self):
+. cases = 0
+. for test in self._tests:
+. cases = cases + test.countTestCases()
+. return cases
+.
+. def addTest(self, test):
+. self._tests.append(test)
+.
+. def addTests(self, tests):
+. for test in tests:
+. self.addTest(test)
+.
+. def run(self, result):
+. return self(result)
+.
+. def __call__(self, result):
+. try: result.startSuite(self)
+. except AttributeError: pass
+.
+. for test in self._tests:
+. if result.shouldStop:
+. break
+. test(result)
+.
+. try: result.endSuite(self)
+. except AttributeError: pass
+.
+. return result
+.
+. def debug(self):
+. """Run the tests without collecting errors in a TestResult"""
+. for test in self._tests: test.debug()
+.
+.
+. class FunctionTestCase(TestCase):
+. """A test case that wraps a test function.
+.
+. This is useful for slipping pre-existing test functions into the
+. PyUnit framework. Optionally, set-up and tidy-up functions can be
+. supplied. As with TestCase, the tidy-up ('tearDown') function will
+. always be called if the set-up ('setUp') function ran successfully.
+. """
+.
+. def __init__(self, testFunc, setUp=None, tearDown=None,
+. description=None):
+. TestCase.__init__(self)
+. self._setUpFunc = setUp
+. self._tearDownFunc = tearDown
+. self._testFunc = testFunc
+. self._description = description
+.
+. def setUp(self):
+. if self._setUpFunc is not None:
+. self._setUpFunc()
+.
+. def tearDown(self):
+. if self._tearDownFunc is not None:
+. self._tearDownFunc()
+.
+. def runTest(self):
+. self._testFunc()
+.
+. def id(self):
+. return self._testFunc.__name__
+.
+. def __str__(self):
+. return "%s (%s)" % (_strclass(self.__class__), self._testFunc.__name__)
+.
+. def __repr__(self):
+. return "<%s testFunc=%s>" % (_strclass(self.__class__), self._testFunc)
+.
+. def shortDescription(self):
+. if self._description is not None: return self._description
+. doc = self._testFunc.__doc__
+. return doc and string.strip(string.split(doc, "\n")[0]) or None
+.
+.
+.
+. ##############################################################################
+. # Locating and loading tests
+. ##############################################################################
+.
+. class TestLoader:
+. """This class is responsible for loading tests according to various
+. criteria and returning them wrapped in a Test
+. """
+. testMethodPrefix = 'test'
+. sortTestMethodsUsing = cmp
+. suiteClass = TestSuite
+.
+. def loadTestsFromTestCase(self, testCaseClass):
+. """Return a suite of all tests cases contained in testCaseClass"""
+. name_list = self.getTestCaseNames(testCaseClass)
+. instance_list = map(testCaseClass, name_list)
+. description = getattr(testCaseClass, '__doc__') \
+. or testCaseClass.__name__
+. description = (description.splitlines()[0]).strip()
+. suite = self.suiteClass(instance_list, description)
+. return suite
+.
+. def loadTestsFromModule(self, module):
+. """Return a suite of all tests cases contained in the given module"""
+. tests = []
+. for name in dir(module):
+. obj = getattr(module, name)
+. if (isinstance(obj, (type, types.ClassType)) and
+. issubclass(obj, TestCase) and
+. not obj in [TestCase, FunctionTestCase]):
+. tests.append(self.loadTestsFromTestCase(obj))
+. description = getattr(module, '__doc__') \
+. or module.__name__
+. description = (description.splitlines()[0]).strip()
+. return self.suiteClass(tests, description)
+.
+. def loadTestsFromName(self, name, module=None):
+. """Return a suite of all tests cases given a string specifier.
+.
+. The name may resolve either to a module, a test case class, a
+. test method within a test case class, or a callable object which
+. returns a TestCase or TestSuite instance.
+.
+. The method optionally resolves the names relative to a given module.
+. """
+. parts = string.split(name, '.')
+. if module is None:
+. if not parts:
+. raise ValueError, "incomplete test name: %s" % name
+. else:
+. parts_copy = parts[:]
+. while parts_copy:
+. try:
+. module = __import__(string.join(parts_copy,'.'))
+. break
+. except ImportError:
+. del parts_copy[-1]
+. if not parts_copy: raise
+. parts = parts[1:]
+. obj = module
+. for part in parts:
+. obj = getattr(obj, part)
+.
+. import unittest
+. if type(obj) == types.ModuleType:
+. return self.loadTestsFromModule(obj)
+. elif (isinstance(obj, (type, types.ClassType)) and
+. issubclass(obj, unittest.TestCase)):
+. return self.loadTestsFromTestCase(obj)
+. elif type(obj) == types.UnboundMethodType:
+. return obj.im_class(obj.__name__)
+. elif callable(obj):
+. test = obj()
+. if not isinstance(test, unittest.TestCase) and \
+. not isinstance(test, unittest.TestSuite):
+. raise ValueError, \
+. "calling %s returned %s, not a test" % (obj,test)
+. return test
+. else:
+. raise ValueError, "don't know how to make test from: %s" % obj
+.
+. def loadTestsFromNames(self, names, module=None):
+. """Return a suite of all tests cases found using the given sequence
+. of string specifiers. See 'loadTestsFromName()'.
+. """
+. suites = []
+. for name in names:
+. suites.append(self.loadTestsFromName(name, module))
+. return self.suiteClass(suites)
+.
+. def getTestCaseNames(self, testCaseClass):
+. """Return a sorted sequence of method names found within testCaseClass
+. """
+. testFnNames = filter(lambda n,p=self.testMethodPrefix: n[:len(p)] == p,
+. dir(testCaseClass))
+. for baseclass in testCaseClass.__bases__:
+. for testFnName in self.getTestCaseNames(baseclass):
+. if testFnName not in testFnNames: # handle overridden methods
+. testFnNames.append(testFnName)
+. if self.sortTestMethodsUsing:
+. testFnNames.sort(self.sortTestMethodsUsing)
+. return testFnNames
+.
+.
+.
+. defaultTestLoader = TestLoader()
+.
+.
+. ##############################################################################
+. # Patches for old functions: these functions should be considered obsolete
+. ##############################################################################
+.
+. def _makeLoader(prefix, sortUsing, suiteClass=None):
+. loader = TestLoader()
+. loader.sortTestMethodsUsing = sortUsing
+. loader.testMethodPrefix = prefix
+. if suiteClass: loader.suiteClass = suiteClass
+. return loader
+.
+. def getTestCaseNames(testCaseClass, prefix, sortUsing=cmp):
+. return _makeLoader(prefix, sortUsing).getTestCaseNames(testCaseClass)
+.
+. def makeSuite(testCaseClass, prefix='test', sortUsing=cmp, suiteClass=TestSuite):
+. return _makeLoader(prefix, sortUsing, suiteClass).loadTestsFromTestCase(testCaseClass)
+.
+. def findTestCases(module, prefix='test', sortUsing=cmp, suiteClass=TestSuite):
+. return _makeLoader(prefix, sortUsing, suiteClass).loadTestsFromModule(module)
+.
+.
+. ##############################################################################
+. # Text UI
+. ##############################################################################
+.
+. class _WritelnDecorator:
+. """Used to decorate file-like objects with a handy 'writeln' method"""
+. def __init__(self,stream):
+. self.stream = stream
+.
+. def __getattr__(self, attr):
+. return getattr(self.stream,attr)
+.
+. def write(self, arg):
+. self.stream.write(arg)
+. self.stream.flush()
+.
+. def writeln(self, arg=None):
+. if arg: self.write(arg)
+. self.write('\n') # text-mode streams translate to \r\n if needed
+.
+.
+. class _TextTestResult(TestResult):
+. """A test result class that can print formatted text results to a stream.
+.
+. Used by TextTestRunner.
+. """
+. separator1 = '=' * 79
+. separator2 = '-' * 79
+.
+. def __init__(self, stream, descriptions, verbosity):
+. TestResult.__init__(self)
+. self.stream = stream
+. self.showAll = verbosity > 1
+. self.dots = verbosity == 1
+. self.descriptions = descriptions
+. if descriptions: self.indent = ' '
+. else: self.indent = ''
+. self.depth = 0
+. self.width = 80
+.
+. def getDescription(self, test):
+. if self.descriptions:
+. return test.shortDescription() or str(test)
+. else:
+. return str(test)
+.
+. def startSuite(self, suite):
+. if self.showAll and self.descriptions:
+. self.stream.write(self.indent * self.depth)
+. try: desc = self.getDescription(suite)
+. except AttributeError: desc = '(no description)'
+. self.stream.writeln(desc)
+. self.depth += 1
+.
+. def startTest(self, test):
+. TestResult.startTest(self, test)
+. if self.showAll:
+. self.stream.write(self.indent * self.depth)
+. d = self.getDescription(test)
+. dwidth = self.width - len(self.indent) * self.depth - 11
+. format = "%%-%is" % dwidth
+. self.stream.write(format % d)
+. self.stream.write(" ... ")
+.
+. def addSuccess(self, test):
+. TestResult.addSuccess(self, test)
+. if self.showAll:
+. self.stream.writeln("ok")
+. elif self.dots:
+. self.stream.write('.')
+.
+. def addError(self, test, err):
+. TestResult.addError(self, test, err)
+. if self.showAll:
+. self.stream.writeln("ERROR")
+. elif self.dots:
+. self.stream.write('E')
+.
+. def addFailure(self, test, err):
+. TestResult.addFailure(self, test, err)
+. if self.showAll:
+. self.stream.writeln("FAIL")
+. elif self.dots:
+. self.stream.write('F')
+.
+. def addSkip(self, test, err):
+. TestResult.addSkip(self, test, err)
+. if self.showAll:
+. self.stream.writeln("skip")
+. elif self.dots:
+. self.stream.write('s')
+.
+. def endSuite(self, suite):
+. self.depth -= 1
+.
+. def printErrors(self):
+. if self.dots or self.showAll:
+. self.stream.writeln()
+. self.printErrorList('ERROR', self.errors)
+. self.printErrorList('FAIL', self.failures)
+.
+. def printErrorList(self, flavour, errors):
+. for test, err in errors:
+. self.stream.writeln(self.separator1)
+. self.stream.writeln("%s: %s" % (flavour,self.getDescription(test)))
+. self.stream.writeln(self.separator2)
+. self.stream.writeln("%s" % err)
+.
+.
+. class TextTestRunner:
+. """A test runner class that displays results in textual form.
+.
+. It prints out the names of tests as they are run, errors as they
+. occur, and a summary of the results at the end of the test run.
+. """
+. def __init__(self, stream=sys.stderr, descriptions=1, verbosity=1):
+. self.stream = _WritelnDecorator(stream)
+. self.descriptions = descriptions
+. self.verbosity = verbosity
+.
+. def _makeResult(self):
+. return _TextTestResult(self.stream, self.descriptions, self.verbosity)
+.
+. def run(self, test):
+. "Run the given test case or test suite."
+. result = self._makeResult()
+. startTime = time.time()
+. test(result)
+. stopTime = time.time()
+. timeTaken = float(stopTime - startTime)
+. result.printErrors()
+. self.stream.writeln(result.separator2)
+. run = result.testsRun
+. self.stream.writeln("Ran %d test%s in %.3fs" %
+. (run, run != 1 and "s" or "", timeTaken))
+. self.stream.writeln()
+. if not result.wasSuccessful():
+. self.stream.write("FAILED (")
+. failed, errored, skipped = map(len, \
+. (result.failures, result.errors, result.skipped))
+. if failed:
+. self.stream.write("failures=%d" % failed)
+. if errored:
+. if failed: self.stream.write(", ")
+. self.stream.write("errors=%d" % errored)
+. if skipped:
+. self.stream.write(", skipped=%d" % skipped)
+. self.stream.writeln(")")
+. else:
+. if result.skipped:
+. self.stream.writeln("OK (skipped=%d)" % len(result.skipped))
+. else:
+. self.stream.writeln("OK")
+. return result
+.
+.
+.
+. ##############################################################################
+. # Facilities for running tests from the command line
+. ##############################################################################
+.
+. class TestProgram:
+. """A command-line program that runs a set of tests; this is primarily
+. for making test modules conveniently executable.
+. """
+. USAGE = """\
+. Usage: %(progName)s [options] [test] [...]
+.
+. Options:
+. -h, --help Show this message
+. -v, --verbose Verbose output
+. -q, --quiet Minimal output
+.
+. Examples:
+. %(progName)s - run default set of tests
+. %(progName)s MyTestSuite - run suite 'MyTestSuite'
+. %(progName)s MyTestCase.testSomething - run MyTestCase.testSomething
+. %(progName)s MyTestCase - run all 'test*' test methods
+. in MyTestCase
+. """
+. def __init__(self, module='__main__', defaultTest=None,
+. argv=None, testRunner=None, testLoader=defaultTestLoader):
+. if type(module) == type(''):
+. self.module = __import__(module)
+. for part in string.split(module,'.')[1:]:
+. self.module = getattr(self.module, part)
+. else:
+. self.module = module
+. if argv is None:
+. argv = sys.argv
+. self.verbosity = 1
+. self.defaultTest = defaultTest
+. self.testRunner = testRunner
+. self.testLoader = testLoader
+. self.progName = os.path.basename(argv[0])
+. self.parseArgs(argv)
+. self.runTests()
+.
+. def usageExit(self, msg=None):
+. if msg: print msg
+. print self.USAGE % self.__dict__
+. sys.exit(2)
+.
+. def parseArgs(self, argv):
+. import getopt
+. try:
+. options, args = getopt.getopt(argv[1:], 'hHvq',
+. ['help','verbose','quiet'])
+. for opt, value in options:
+. if opt in ('-h','-H','--help'):
+. self.usageExit()
+. if opt in ('-q','--quiet'):
+. self.verbosity = 0
+. if opt in ('-v','--verbose'):
+. self.verbosity = 2
+. if len(args) == 0 and self.defaultTest is None:
+. self.test = self.testLoader.loadTestsFromModule(self.module)
+. return
+. if len(args) > 0:
+. self.testNames = args
+. else:
+. self.testNames = (self.defaultTest,)
+. self.createTests()
+. except getopt.error, msg:
+. self.usageExit(msg)
+.
+. def createTests(self):
+. self.test = self.testLoader.loadTestsFromNames(self.testNames,
+. self.module)
+.
+. def runTests(self):
+. if self.testRunner is None:
+. self.testRunner = TextTestRunner(verbosity=self.verbosity)
+. result = self.testRunner.run(self.test)
+. sys.exit(not result.wasSuccessful())
+.
+. main = TestProgram
+.
+.
+. ##############################################################################
+. # Executing this module from the command line
+. ##############################################################################
+.
+. if __name__ == "__main__":
+. main(module=None)
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/f2/runtests.py-20051228065045-34ad1617fed73462.weave
+++ urlgrabber-2.9.7/.bzr/weaves/f2/runtests.py-20051228065045-34ad1617fed73462.weave
@@ -0,0 +1,70 @@
+# bzr weave file v5
+i
+1 0aee74f73f3b4e0b0469b92a69b8386d04be4afb
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+{ 0
+. #!/usr/bin/python
+.
+. """Usage: python runtests.py [OPTIONS]
+. Quick script to run all unit tests from source directory
+. (e.g. without having to install.)
+.
+. OPTIONS:
+.
+. -d, --descriptions=NUM Set to 0 to turn off printing
+. test doc strings as descriptions.
+. -v, --verbosity=NUM Output verbosity level. Defaults to
+. 2 which is one line of info per test. Set
+. to 1 to get one char of info per test
+. or 0 to disable status output completely.
+. """
+.
+. # $Id: runtests.py,v 1.7 2004/03/31 17:02:00 mstenner Exp $
+.
+. import sys
+. from os.path import dirname, join as joinpath
+. from getopt import getopt
+. from base_test_code import *
+.
+. def main():
+. # setup sys.path so that we can run this from the source
+. # directory.
+. (descriptions, verbosity) = parse_args()
+. dn = dirname(sys.argv[0])
+. sys.path.insert(0, joinpath(dn,'..'))
+. sys.path.insert(0, dn)
+. # it's okay to import now that sys.path is setup.
+. import test_grabber, test_byterange, test_mirror, test_keepalive
+. suite = TestSuite( (test_grabber.suite(),
+. test_byterange.suite(),
+. test_mirror.suite(),
+. test_keepalive.suite()) )
+. suite.description = 'urlgrabber tests'
+. runner = TextTestRunner(stream=sys.stdout,
+. descriptions=descriptions,
+. verbosity=verbosity)
+. runner.run(suite)
+.
+. def parse_args():
+. descriptions = 1
+. verbosity = 2
+. opts, args = getopt(sys.argv[1:],'hd:v:',['descriptions=','help','verbosity='])
+. for o,a in opts:
+. if o in ('-h', '--help'):
+. usage()
+. sys.exit(0)
+. elif o in ('-d', '--descriptions'):
+. descriptions = int(a)
+. elif o in ('-v', '--verbosity'):
+. verbosity = int(a)
+. return (descriptions,verbosity)
+.
+. def usage():
+. print __doc__
+.
+. if __name__ == '__main__':
+. main()
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/e0/test_byterange.py-20051228065045-cf8f115fc51ca9c9.weave
+++ urlgrabber-2.9.7/.bzr/weaves/e0/test_byterange.py-20051228065045-cf8f115fc51ca9c9.weave
@@ -0,0 +1,171 @@
+# bzr weave file v5
+i
+1 0f9c067fd051d0b5fd8d521b85458adc63f8bf50
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+{ 0
+. #!/usr/bin/python -t
+.
+. # This library is free software; you can redistribute it and/or
+. # modify it under the terms of the GNU Lesser General Public
+. # License as published by the Free Software Foundation; either
+. # version 2.1 of the License, or (at your option) any later version.
+. #
+. # This library is distributed in the hope that it will be useful,
+. # but WITHOUT ANY WARRANTY; without even the implied warranty of
+. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+. # Lesser General Public License for more details.
+. #
+. # You should have received a copy of the GNU Lesser General Public
+. # License along with this library; if not, write to the
+. # Free Software Foundation, Inc.,
+. # 59 Temple Place, Suite 330,
+. # Boston, MA 02111-1307 USA
+.
+. # This file is part of urlgrabber, a high-level cross-protocol url-grabber
+. # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+.
+. """byterange.py tests"""
+.
+. # $Id: test_byterange.py,v 1.6 2004/03/31 17:02:00 mstenner Exp $
+.
+. import sys
+.
+. from StringIO import StringIO
+. from urlgrabber.byterange import RangeableFileObject
+.
+. from base_test_code import *
+.
+. class RangeableFileObjectTestCase(TestCase):
+. """Test range.RangeableFileObject class"""
+.
+. def setUp(self):
+. # 0 1 2 3 4 5 6 7 8 9
+. # 0123456789012345678901234567890123456789012345678901234567 890123456789012345678901234567890
+. self.test = 'Why cannot we write the entire 24 volumes of Encyclopaedia\nBrittanica on the head of a pin?\n'
+. self.fo = StringIO(self.test)
+. self.rfo = RangeableFileObject(self.fo, (20,69))
+.
+. def tearDown(self):
+. pass
+.
+. def test_seek(self):
+. """RangeableFileObject.seek()"""
+. self.rfo.seek(11)
+. self.assertEquals('24', self.rfo.read(2))
+. self.rfo.seek(14)
+. self.assertEquals('volumes', self.rfo.read(7))
+. self.rfo.seek(1,1)
+. self.assertEquals('of', self.rfo.read(2))
+.
+. def test_poor_mans_seek(self):
+. """RangeableFileObject.seek() poor mans version..
+.
+. We just delete the seek method from StringIO so we can
+. excercise RangeableFileObject when the file object supplied
+. doesn't support seek.
+. """
+. seek = StringIO.seek
+. del(StringIO.seek)
+. self.test_seek()
+. StringIO.seek = seek
+.
+. def test_read(self):
+. """RangeableFileObject.read()"""
+. self.assertEquals('the', self.rfo.read(3))
+. self.assertEquals(' entire 24 volumes of ', self.rfo.read(22))
+. self.assertEquals('Encyclopaedia\nBrittanica', self.rfo.read(50))
+. self.assertEquals('', self.rfo.read())
+.
+. def test_readall(self):
+. """RangeableFileObject.read(): to end of file."""
+. rfo = RangeableFileObject(StringIO(self.test),(11,))
+. self.assertEquals(self.test[11:],rfo.read())
+.
+. def test_readline(self):
+. """RangeableFileObject.readline()"""
+. self.assertEquals('the entire 24 volumes of Encyclopaedia\n', self.rfo.readline())
+. self.assertEquals('Brittanica', self.rfo.readline())
+. self.assertEquals('', self.rfo.readline())
+.
+. def test_tell(self):
+. """RangeableFileObject.tell()"""
+. self.assertEquals(0,self.rfo.tell())
+. self.rfo.read(5)
+. self.assertEquals(5,self.rfo.tell())
+. self.rfo.readline()
+. self.assertEquals(39,self.rfo.tell())
+.
+. class RangeModuleTestCase(TestCase):
+. """Test module level functions defined in range.py"""
+. def setUp(self):
+. pass
+.
+. def tearDown(self):
+. pass
+.
+. def test_range_tuple_normalize(self):
+. """byterange.range_tuple_normalize()"""
+. from urlgrabber.byterange import range_tuple_normalize
+. from urlgrabber.byterange import RangeError
+. tests = (
+. ((None,50), (0,50)),
+. ((500,600), (500,600)),
+. ((500,), (500,'')),
+. ((500,None), (500,'')),
+. (('',''), None),
+. ((0,), None),
+. (None, None)
+. )
+. for test, ex in tests:
+. self.assertEquals( range_tuple_normalize(test), ex )
+.
+. try: range_tuple_normalize( (10,8) )
+. except RangeError: pass
+. else: self.fail("range_tuple_normalize( (10,8) ) should have raised RangeError")
+.
+. def test_range_header_to_tuple(self):
+. """byterange.range_header_to_tuple()"""
+. from urlgrabber.byterange import range_header_to_tuple
+. tests = (
+. ('bytes=500-600', (500,601)),
+. ('bytes=500-', (500,'')),
+. ('bla bla', ()),
+. (None, None)
+. )
+. for test, ex in tests:
+. self.assertEquals( range_header_to_tuple(test), ex )
+.
+. def test_range_tuple_to_header(self):
+. """byterange.range_tuple_to_header()"""
+. from urlgrabber.byterange import range_tuple_to_header
+. tests = (
+. ((500,600), 'bytes=500-599'),
+. ((500,''), 'bytes=500-'),
+. ((500,), 'bytes=500-'),
+. ((None,500), 'bytes=0-499'),
+. (('',500), 'bytes=0-499'),
+. (None, None),
+. )
+. for test, ex in tests:
+. self.assertEquals( range_tuple_to_header(test), ex )
+.
+. try: range_tuple_to_header( ('not an int',500) )
+. except ValueError: pass
+. else: self.fail("range_tuple_to_header( ('not an int',500) ) should have raised ValueError")
+.
+. try: range_tuple_to_header( (0,'not an int') )
+. except ValueError: pass
+. else: self.fail("range_tuple_to_header( (0, 'not an int') ) should have raised ValueError")
+.
+. def suite():
+. tl = TestLoader()
+. return tl.loadTestsFromModule(sys.modules[__name__])
+.
+. if __name__ == '__main__':
+. runner = TextTestRunner(stream=sys.stdout,descriptions=1,verbosity=2)
+. runner.run(suite())
+.
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/cb/test_grabber.py-20051228065045-53c2781dca9e99bf.weave
+++ urlgrabber-2.9.7/.bzr/weaves/cb/test_grabber.py-20051228065045-53c2781dca9e99bf.weave
@@ -0,0 +1,586 @@
+# bzr weave file v5
+i
+1 698bd7270f130d2afbf308a2321248445c6449e7
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+i 0
+1 87488c22f6fec2223f0a5a41a42f52d95a4cdca6
+n wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
+
+w
+{ 0
+. #!/usr/bin/python -t
+.
+. # This library is free software; you can redistribute it and/or
+. # modify it under the terms of the GNU Lesser General Public
+. # License as published by the Free Software Foundation; either
+. # version 2.1 of the License, or (at your option) any later version.
+. #
+. # This library is distributed in the hope that it will be useful,
+. # but WITHOUT ANY WARRANTY; without even the implied warranty of
+. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+. # Lesser General Public License for more details.
+. #
+. # You should have received a copy of the GNU Lesser General Public
+. # License along with this library; if not, write to the
+. # Free Software Foundation, Inc.,
+. # 59 Temple Place, Suite 330,
+. # Boston, MA 02111-1307 USA
+.
+. # This file is part of urlgrabber, a high-level cross-protocol url-grabber
+. # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+.
+. """grabber.py tests"""
+.
+[ 1
+. # $Id: test_grabber.py,v 1.24 2005/02/25 00:00:36 mstenner Exp $
+] 1
+{ 1
+. # $Id: test_grabber.py,v 1.28 2005/10/22 21:57:27 mstenner Exp $
+}
+.
+. import sys
+. import os
+. import string, tempfile, random, cStringIO, os
+. import urllib2
+. import socket
+.
+. from base_test_code import *
+.
+. import urlgrabber
+. import urlgrabber.grabber as grabber
+. from urlgrabber.grabber import URLGrabber, URLGrabError, CallbackObject
+. from urlgrabber.progress import text_progress_meter
+.
+. class FileObjectTests(TestCase):
+.
+. def setUp(self):
+. self.filename = tempfile.mktemp()
+. fo = open(self.filename, 'w')
+. fo.write(reference_data)
+. fo.close()
+.
+. self.fo_input = cStringIO.StringIO(reference_data)
+. self.fo_output = cStringIO.StringIO()
+. self.wrapper = grabber.URLGrabberFileObject('file://' + self.filename, self.fo_output,
+. grabber.default_grabber.opts)
+.
+. def tearDown(self):
+. os.unlink(self.filename)
+.
+. def test_readall(self):
+. "URLGrabberFileObject .read() method"
+. s = self.wrapper.read()
+. self.fo_output.write(s)
+. self.assertEqual(reference_data, self.fo_output.getvalue())
+.
+. def test_readline(self):
+. "URLGrabberFileObject .readline() method"
+. while 1:
+. s = self.wrapper.readline()
+. self.fo_output.write(s)
+. if not s: break
+. self.assertEqual(reference_data, self.fo_output.getvalue())
+.
+. def test_readlines(self):
+. "URLGrabberFileObject .readlines() method"
+. li = self.wrapper.readlines()
+. self.fo_output.write(string.join(li, ''))
+. self.assertEqual(reference_data, self.fo_output.getvalue())
+.
+. def test_smallread(self):
+. "URLGrabberFileObject .read(N) with small N"
+. while 1:
+. s = self.wrapper.read(23)
+. self.fo_output.write(s)
+. if not s: break
+. self.assertEqual(reference_data, self.fo_output.getvalue())
+.
+. class HTTPTests(TestCase):
+. def test_reference_file(self):
+. "download refernce file via HTTP"
+. filename = tempfile.mktemp()
+. grabber.urlgrab(ref_http, filename)
+.
+. fo = open(filename)
+. contents = fo.read()
+. fo.close()
+.
+. self.assertEqual(contents, reference_data)
+{ 1
+.
+. def test_post(self):
+. "do an HTTP post"
+. headers = (('Content-type', 'text/plain'),)
+. ret = grabber.urlread(base_http + 'test_post.php',
+. data=short_reference_data,
+. http_headers=headers)
+.
+. self.assertEqual(ret, short_reference_data)
+}
+.
+. class URLGrabberModuleTestCase(TestCase):
+. """Test module level functions defined in grabber.py"""
+. def setUp(self):
+. pass
+.
+. def tearDown(self):
+. pass
+.
+. def test_urlopen(self):
+. "module-level urlopen() function"
+. fo = urlgrabber.urlopen('http://www.python.org')
+. fo.close()
+.
+. def test_urlgrab(self):
+. "module-level urlgrab() function"
+. outfile = tempfile.mktemp()
+. filename = urlgrabber.urlgrab('http://www.python.org',
+. filename=outfile)
+. os.unlink(outfile)
+.
+. def test_urlread(self):
+. "module-level urlread() function"
+. s = urlgrabber.urlread('http://www.python.org')
+.
+.
+. class URLGrabberTestCase(TestCase):
+. """Test grabber.URLGrabber class"""
+.
+. def setUp(self):
+. self.meter = text_progress_meter( fo=open('/dev/null', 'w') )
+. pass
+.
+. def tearDown(self):
+. pass
+.
+. def testKeywordArgs(self):
+. """grabber.URLGrabber.__init__() **kwargs handling.
+.
+. This is a simple test that just passes some arbitrary
+. values into the URLGrabber constructor and checks that
+. they've been set properly.
+. """
+. opener = urllib2.OpenerDirector()
+. g = URLGrabber( progress_obj=self.meter,
+. throttle=0.9,
+. bandwidth=20,
+. retry=20,
+. retrycodes=[5,6,7],
+. copy_local=1,
+. close_connection=1,
+. user_agent='test ua/1.0',
+. proxies={'http' : 'http://www.proxy.com:9090'},
+. opener=opener )
+. opts = g.opts
+. self.assertEquals( opts.progress_obj, self.meter )
+. self.assertEquals( opts.throttle, 0.9 )
+. self.assertEquals( opts.bandwidth, 20 )
+. self.assertEquals( opts.retry, 20 )
+. self.assertEquals( opts.retrycodes, [5,6,7] )
+. self.assertEquals( opts.copy_local, 1 )
+. self.assertEquals( opts.close_connection, 1 )
+. self.assertEquals( opts.user_agent, 'test ua/1.0' )
+. self.assertEquals( opts.proxies, {'http' : 'http://www.proxy.com:9090'} )
+. self.assertEquals( opts.opener, opener )
+.
+. nopts = grabber.URLGrabberOptions(delegate=opts, throttle=0.5,
+. copy_local=0)
+. self.assertEquals( nopts.progress_obj, self.meter )
+. self.assertEquals( nopts.throttle, 0.5 )
+. self.assertEquals( nopts.bandwidth, 20 )
+. self.assertEquals( nopts.retry, 20 )
+. self.assertEquals( nopts.retrycodes, [5,6,7] )
+. self.assertEquals( nopts.copy_local, 0 )
+. self.assertEquals( nopts.close_connection, 1 )
+. self.assertEquals( nopts.user_agent, 'test ua/1.0' )
+. self.assertEquals( nopts.proxies, {'http' : 'http://www.proxy.com:9090'} )
+. nopts.opener = None
+. self.assertEquals( nopts.opener, None )
+.
+. def test_parse_url(self):
+. """grabber.URLGrabber._parse_url()"""
+. g = URLGrabber()
+. (url, parts) = g._parse_url('http://user:pass@host.com/path/part/basename.ext?arg1=val1&arg2=val2#hash')
+. (scheme, host, path, parm, query, frag) = parts
+. self.assertEquals('http://host.com/path/part/basename.ext?arg1=val1&arg2=val2#hash',url)
+. self.assertEquals('http', scheme)
+. self.assertEquals('host.com', host)
+. self.assertEquals('/path/part/basename.ext', path)
+. self.assertEquals('arg1=val1&arg2=val2', query)
+. self.assertEquals('hash', frag)
+.
+. def test_parse_url_local_filename(self):
+. """grabber.URLGrabber._parse_url('/local/file/path') """
+. g = URLGrabber()
+. (url, parts) = g._parse_url('/etc/redhat-release')
+. (scheme, host, path, parm, query, frag) = parts
+. self.assertEquals('file:///etc/redhat-release',url)
+. self.assertEquals('file', scheme)
+. self.assertEquals('', host)
+. self.assertEquals('/etc/redhat-release', path)
+. self.assertEquals('', query)
+. self.assertEquals('', frag)
+.
+. def test_parse_url_with_prefix(self):
+. """grabber.URLGrabber._parse_url() with .prefix"""
+. base = 'http://foo.com/dir'
+. bases = [base, base+'/']
+. file = 'bar/baz'
+. target = base + '/' + file
+.
+. for b in bases:
+. g = URLGrabber(prefix=b)
+. (url, parts) = g._parse_url(file)
+. self.assertEquals(url, target)
+.
+. def test_make_callback(self):
+. """grabber.URLGrabber._make_callback() tests"""
+. def cb(e): pass
+. tup_cb = (cb, ('stuff'), {'some': 'dict'})
+. g = URLGrabber()
+. self.assertEquals(g._make_callback(cb), (cb, (), {}))
+. self.assertEquals(g._make_callback(tup_cb), tup_cb)
+.
+. class FailureTestCase(TestCase):
+. """Test failure behavior"""
+.
+. def _failure_callback(self, obj, *args, **kwargs):
+. self.failure_callback_called = 1
+. self.obj = obj
+. self.args = args
+. self.kwargs = kwargs
+.
+. def test_failure_callback_called(self):
+. "failure callback is called on retry"
+. self.failure_callback_called = 0
+[ 1
+. g = grabber.URLGrabber(retry=2,failure_callback=self._failure_callback)
+] 1
+{ 1
+. g = grabber.URLGrabber(retry=2, retrycodes=[14],
+. failure_callback=self._failure_callback)
+}
+. try: g.urlgrab(ref_404)
+. except URLGrabError: pass
+. self.assertEquals(self.failure_callback_called, 1)
+.
+. def test_failure_callback_args(self):
+. "failure callback is called with the proper args"
+. fc = (self._failure_callback, ('foo',), {'bar': 'baz'})
+[ 1
+. g = grabber.URLGrabber(retry=2,failure_callback=fc)
+] 1
+{ 1
+. g = grabber.URLGrabber(retry=2, retrycodes=[14],
+. failure_callback=fc)
+}
+. try: g.urlgrab(ref_404)
+. except URLGrabError: pass
+. self.assert_(hasattr(self, 'obj'))
+. self.assert_(hasattr(self, 'args'))
+. self.assert_(hasattr(self, 'kwargs'))
+. self.assertEquals(self.args, ('foo',))
+. self.assertEquals(self.kwargs, {'bar': 'baz'})
+. self.assert_(isinstance(self.obj, CallbackObject))
+. self.assertEquals(self.obj.url, ref_404)
+. self.assert_(isinstance(self.obj.exception, URLGrabError))
+. del self.obj
+.
+{ 1
+. class InterruptTestCase(TestCase):
+. """Test interrupt callback behavior"""
+.
+. class InterruptProgress:
+. def start(self, *args, **kwargs): pass
+. def update(self, *args, **kwargs): raise KeyboardInterrupt
+. def end(self, *args, **kwargs): pass
+.
+. class TestException(Exception): pass
+.
+. def _interrupt_callback(self, obj, *args, **kwargs):
+. self.interrupt_callback_called = 1
+. self.obj = obj
+. self.args = args
+. self.kwargs = kwargs
+. if kwargs.get('exception', None):
+. raise kwargs['exception']
+.
+. def test_interrupt_callback_called(self):
+. "interrupt callback is called on retry"
+. self.interrupt_callback_called = 0
+. ic = (self._interrupt_callback, (), {})
+. g = grabber.URLGrabber(progress_obj=self.InterruptProgress(),
+. interrupt_callback=ic)
+. try: g.urlgrab(ref_http)
+. except KeyboardInterrupt: pass
+. self.assertEquals(self.interrupt_callback_called, 1)
+.
+. def test_interrupt_callback_raises(self):
+. "interrupt callback raises an exception"
+. ic = (self._interrupt_callback, (),
+. {'exception': self.TestException()})
+. g = grabber.URLGrabber(progress_obj=self.InterruptProgress(),
+. interrupt_callback=ic)
+. self.assertRaises(self.TestException, g.urlgrab, ref_http)
+.
+}
+. class CheckfuncTestCase(TestCase):
+. """Test checkfunc behavior"""
+.
+. def setUp(self):
+. cf = (self._checkfunc, ('foo',), {'bar': 'baz'})
+. self.g = grabber.URLGrabber(checkfunc=cf)
+. self.filename = tempfile.mktemp()
+. self.data = short_reference_data
+.
+. def tearDown(self):
+. try: os.unlink(self.filename)
+. except: pass
+. if hasattr(self, 'obj'): del self.obj
+.
+. def _checkfunc(self, obj, *args, **kwargs):
+. self.obj = obj
+. self.args = args
+. self.kwargs = kwargs
+.
+. if hasattr(obj, 'filename'):
+. # we used urlgrab
+. fo = file(obj.filename)
+. data = fo.read()
+. fo.close()
+. else:
+. # we used urlread
+. data = obj.data
+.
+. if data == self.data: return
+. else: raise URLGrabError(-2, "data doesn't match")
+.
+. def _check_common_args(self):
+. "check the args that are common to both urlgrab and urlread"
+. self.assert_(hasattr(self, 'obj'))
+. self.assert_(hasattr(self, 'args'))
+. self.assert_(hasattr(self, 'kwargs'))
+. self.assertEquals(self.args, ('foo',))
+. self.assertEquals(self.kwargs, {'bar': 'baz'})
+. self.assert_(isinstance(self.obj, CallbackObject))
+. self.assertEquals(self.obj.url, short_ref_http)
+.
+. def test_checkfunc_urlgrab_args(self):
+. "check for proper args when used with urlgrab"
+. self.g.urlgrab(short_ref_http, self.filename)
+. self._check_common_args()
+. self.assertEquals(self.obj.filename, self.filename)
+.
+. def test_checkfunc_urlread_args(self):
+. "check for proper args when used with urlread"
+. self.g.urlread(short_ref_http)
+. self._check_common_args()
+. self.assertEquals(self.obj.data, short_reference_data)
+.
+. def test_checkfunc_urlgrab_success(self):
+. "check success with urlgrab checkfunc"
+. self.data = short_reference_data
+. self.g.urlgrab(short_ref_http, self.filename)
+.
+. def test_checkfunc_urlread_success(self):
+. "check success with urlread checkfunc"
+. self.data = short_reference_data
+. self.g.urlread(short_ref_http)
+.
+. def test_checkfunc_urlgrab_failure(self):
+. "check failure with urlgrab checkfunc"
+. self.data = 'other data'
+. self.assertRaises(URLGrabError, self.g.urlgrab,
+. short_ref_http, self.filename)
+.
+. def test_checkfunc_urlread_failure(self):
+. "check failure with urlread checkfunc"
+. self.data = 'other data'
+. self.assertRaises(URLGrabError, self.g.urlread,
+. short_ref_http)
+.
+. class RegetTestBase:
+. def setUp(self):
+. self.ref = short_reference_data
+. self.grabber = grabber.URLGrabber(reget='check_timestamp')
+. self.filename = tempfile.mktemp()
+. self.hl = len(self.ref) / 2
+. self.url = 'OVERRIDE THIS'
+.
+. def tearDown(self):
+. try: os.unlink(self.filename)
+. except: pass
+.
+. def _make_half_zero_file(self):
+. fo = open(self.filename, 'w')
+. fo.write('0'*self.hl)
+. fo.close()
+.
+. def _read_file(self):
+. fo = open(self.filename, 'r')
+. data = fo.read()
+. fo.close()
+. return data
+.
+. class CommonRegetTests(RegetTestBase, TestCase):
+. def test_bad_reget_type(self):
+. "exception raised for illegal reget mode"
+. self.assertRaises(URLGrabError, self.grabber.urlgrab,
+. self.url, self.filename, reget='junk')
+.
+. class FTPRegetTests(RegetTestBase, TestCase):
+. def setUp(self):
+. RegetTestBase.setUp(self)
+. self.url = short_ref_ftp
+. # this tests to see if the server is available. If it's not,
+. # then these tests will be skipped
+. try:
+. fo = urllib2.urlopen(self.url).close()
+. except IOError:
+. self.skip()
+.
+. def test_basic_reget(self):
+. 'simple (forced) reget'
+. self._make_half_zero_file()
+. self.grabber.urlgrab(self.url, self.filename, reget='simple')
+. data = self._read_file()
+.
+. self.assertEquals(data[:self.hl], '0'*self.hl)
+. self.assertEquals(data[self.hl:], self.ref[self.hl:])
+.
+. class HTTPRegetTests(FTPRegetTests):
+. def setUp(self):
+. RegetTestBase.setUp(self)
+. self.url = short_ref_http
+.
+. def test_older_check_timestamp(self):
+. # define this here rather than in the FTP tests because currently,
+. # we get no timestamp information back from ftp servers.
+. self._make_half_zero_file()
+. ts = 1600000000 # set local timestamp to 2020
+. os.utime(self.filename, (ts, ts))
+. self.grabber.urlgrab(self.url, self.filename, reget='check_timestamp')
+. data = self._read_file()
+.
+. self.assertEquals(data[:self.hl], '0'*self.hl)
+. self.assertEquals(data[self.hl:], self.ref[self.hl:])
+.
+. def test_newer_check_timestamp(self):
+. # define this here rather than in the FTP tests because currently,
+. # we get no timestamp information back from ftp servers.
+. self._make_half_zero_file()
+. ts = 1 # set local timestamp to 1969
+. os.utime(self.filename, (ts, ts))
+. self.grabber.urlgrab(self.url, self.filename, reget='check_timestamp')
+. data = self._read_file()
+.
+. self.assertEquals(data, self.ref)
+.
+. class FileRegetTests(HTTPRegetTests):
+. def setUp(self):
+. self.ref = short_reference_data
+. tmp = tempfile.mktemp()
+. tmpfo = open(tmp, 'w')
+. tmpfo.write(self.ref)
+. tmpfo.close()
+. self.tmp = tmp
+.
+. self.url = 'file://' + tmp
+.
+. self.grabber = grabber.URLGrabber(reget='check_timestamp',
+. copy_local=1)
+. self.filename = tempfile.mktemp()
+. self.hl = len(self.ref) / 2
+.
+. def tearDown(self):
+. try: os.unlink(self.filename)
+. except: pass
+. try: os.unlink(self.tmp)
+. except: pass
+.
+. class ProFTPDSucksTests(TestCase):
+. def setUp(self):
+. self.url = ref_proftp
+. try:
+. fo = urllib2.urlopen(self.url).close()
+. except IOError:
+. self.skip()
+.
+. def test_restart_workaround(self):
+. inst = grabber.URLGrabber()
+. rslt = inst.urlread(self.url, range=(500, 1000))
+.
+. class BaseProxyTests(TestCase):
+. good_p = '%s://%s:%s@%s:%i' % (proxy_proto, proxy_user,
+. good_proxy_pass, proxy_host, proxy_port)
+. bad_p = '%s://%s:%s@%s:%i' % (proxy_proto, proxy_user,
+. bad_proxy_pass, proxy_host, proxy_port)
+. good_proxies = {'ftp': good_p, 'http': good_p}
+. bad_proxies = {'ftp': bad_p, 'http': bad_p}
+.
+. def have_proxy(self):
+. have_proxy = 1
+. s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+. try:
+. s.connect((proxy_host, proxy_port))
+. s.close()
+. except socket.error:
+. have_proxy = 0
+. return have_proxy
+.
+{ 1
+. class ProxyFormatTests(BaseProxyTests):
+. def setUp(self):
+. grabber._proxy_cache = []
+.
+. def tearDown(self):
+. grabber._proxy_cache = []
+.
+. def test_good_proxy_formats(self):
+. for f in ['http://foo.com/', 'http://user:pass@foo.com:8888']:
+. h = grabber.CachedProxyHandler({'http': f})
+.
+. def test_bad_proxy_formats(self):
+. for f in ['foo.com', 'foo.com:8888', 'user:pass@foo.com:8888']:
+. self.assertRaises(URLGrabError, grabber.CachedProxyHandler,
+. {'http': f})
+.
+.
+}
+. class ProxyHTTPAuthTests(BaseProxyTests):
+. def setUp(self):
+. self.url = ref_http
+. if not self.have_proxy():
+. self.skip()
+. self.g = URLGrabber()
+.
+. def test_good_password(self):
+. self.g.urlopen(self.url, proxies=self.good_proxies)
+.
+. def test_bad_password(self):
+. self.assertRaises(URLGrabError, self.g.urlopen,
+. self.url, proxies=self.bad_proxies)
+.
+. class ProxyFTPAuthTests(ProxyHTTPAuthTests):
+. def setUp(self):
+. self.url = ref_ftp
+. if not self.have_proxy():
+. self.skip()
+. try:
+. fo = urllib2.urlopen(self.url).close()
+. except IOError:
+. self.skip()
+. self.g = URLGrabber()
+.
+. def suite():
+. tl = TestLoader()
+. return tl.loadTestsFromModule(sys.modules[__name__])
+.
+. if __name__ == '__main__':
+. grabber.DEBUG = 0
+. runner = TextTestRunner(stream=sys.stdout,descriptions=1,verbosity=2)
+. runner.run(suite())
+.
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/a8/test_keepalive.py-20051228065045-41d3009733f91016.weave
+++ urlgrabber-2.9.7/.bzr/weaves/a8/test_keepalive.py-20051228065045-41d3009733f91016.weave
@@ -0,0 +1,341 @@
+# bzr weave file v5
+i
+1 1e88a6b473072070e5949cc9e1b9472067a6bc14
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+i 0
+1 40049b943cb20fbc63db873ebd67f9a3b2b8a196
+n wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
+
+w
+{ 0
+. #!/usr/bin/python -t
+.
+. # This library is free software; you can redistribute it and/or
+. # modify it under the terms of the GNU Lesser General Public
+. # License as published by the Free Software Foundation; either
+. # version 2.1 of the License, or (at your option) any later version.
+. #
+. # This library is distributed in the hope that it will be useful,
+. # but WITHOUT ANY WARRANTY; without even the implied warranty of
+. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+. # Lesser General Public License for more details.
+. #
+. # You should have received a copy of the GNU Lesser General Public
+. # License along with this library; if not, write to the
+. # Free Software Foundation, Inc.,
+. # 59 Temple Place, Suite 330,
+. # Boston, MA 02111-1307 USA
+.
+. # This file is part of urlgrabber, a high-level cross-protocol url-grabber
+. # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+.
+. """keepalive.py tests"""
+.
+[ 1
+. # $Id: test_keepalive.py,v 1.9 2005/02/14 21:55:06 mstenner Exp $
+] 1
+{ 1
+. # $Id: test_keepalive.py,v 1.11 2005/10/22 21:57:27 mstenner Exp $
+}
+.
+. import sys
+. import os
+. import time
+. import urllib2
+. import threading
+. import re
+.
+. from urllib2 import URLError, HTTPError
+.
+. from base_test_code import *
+.
+. from urlgrabber import keepalive
+.
+{ 1
+. class FakeLogger:
+. def __init__(self):
+. self.logs = []
+. def debug(self, msg, *args):
+. self.logs.append(msg % args)
+. warn = warning = info = error = debug
+.
+}
+. class CorruptionTests(TestCase):
+. def setUp(self):
+. self.kh = keepalive.HTTPHandler()
+. self.opener = urllib2.build_opener(self.kh)
+. self.ref = ref_http
+. self.fo = self.opener.open(self.ref)
+.
+. def tearDown(self):
+. self.fo.close()
+. self.kh.close_all()
+.
+. def test_readall(self):
+. "download a file with a single call to read()"
+. data = self.fo.read()
+. self.assert_(data == reference_data)
+.
+. def test_readline(self):
+. "download a file with multiple calls to readline()"
+. data = ''
+. while 1:
+. s = self.fo.readline()
+. if s: data = data + s
+. else: break
+. self.assert_(data == reference_data)
+.
+. def test_readlines(self):
+. "download a file with a single call to readlines()"
+. lines = self.fo.readlines()
+. data = ''.join(lines)
+. self.assert_(data == reference_data)
+.
+. def test_smallread(self):
+. "download a file with multiple calls to read(23)"
+. data = ''
+. while 1:
+. s = self.fo.read(23)
+. if s: data = data + s
+. else: break
+. self.assert_(data == reference_data)
+.
+. def test_mixed_read(self):
+. "download a file with mixed readline() and read(23) calls"
+. data = ''
+. while 1:
+. s = self.fo.read(23)
+. if s: data = data + s
+. else: break
+. s = self.fo.readline()
+. if s: data = data + s
+. else: break
+. self.assert_(data == reference_data)
+.
+. class HTTPErrorTests(TestCase):
+. def setUp(self):
+. self.kh = keepalive.HTTPHandler()
+. self.opener = urllib2.build_opener(self.kh)
+. import sys
+[ 1
+. self.python_version = map(int, sys.version.split()[0].split('.'))
+] 1
+{ 1
+. self.python_version = sys.version_info
+}
+.
+. def tearDown(self):
+. self.kh.close_all()
+. keepalive.HANDLE_ERRORS = 1
+.
+. def test_200_handler_on(self):
+. "test that 200 works with fancy handler"
+. keepalive.HANDLE_ERRORS = 1
+. fo = self.opener.open(ref_http)
+. data = fo.read()
+. fo.close()
+. self.assertEqual((fo.status, fo.reason), (200, 'OK'))
+.
+. def test_200_handler_off(self):
+. "test that 200 works without fancy handler"
+. keepalive.HANDLE_ERRORS = 0
+. fo = self.opener.open(ref_http)
+. data = fo.read()
+. fo.close()
+. self.assertEqual((fo.status, fo.reason), (200, 'OK'))
+.
+. def test_404_handler_on(self):
+. "test that 404 works with fancy handler"
+. keepalive.HANDLE_ERRORS = 1
+. self.assertRaises(URLError, self.opener.open, ref_404)
+.
+. def test_404_handler_off(self):
+. "test that 404 works without fancy handler"
+. keepalive.HANDLE_ERRORS = 0
+. ## see the HANDLE_ERRORS note in keepalive.py for discussion of
+. ## the changes in python 2.4
+[ 1
+. if self.python_version >= [2, 4]:
+] 1
+{ 1
+. if self.python_version >= (2, 4):
+}
+. self.assertRaises(URLError, self.opener.open, ref_404)
+. else:
+. fo = self.opener.open(ref_404)
+. data = fo.read()
+. fo.close()
+. self.assertEqual((fo.status, fo.reason), (404, 'Not Found'))
+.
+. def test_403_handler_on(self):
+. "test that 403 works with fancy handler"
+. keepalive.HANDLE_ERRORS = 1
+. self.assertRaises(URLError, self.opener.open, ref_403)
+.
+. def test_403_handler_off(self):
+. "test that 403 works without fancy handler"
+. keepalive.HANDLE_ERRORS = 0
+. ## see the HANDLE_ERRORS note in keepalive.py for discussion of
+. ## the changes in python 2.4
+[ 1
+. if self.python_version >= [2, 4]:
+] 1
+{ 1
+. if self.python_version >= (2, 4):
+}
+. self.assertRaises(URLError, self.opener.open, ref_403)
+. else:
+. fo = self.opener.open(ref_403)
+. data = fo.read()
+. fo.close()
+. self.assertEqual((fo.status, fo.reason), (403, 'Forbidden'))
+.
+. class DroppedConnectionTests(TestCase):
+. def setUp(self):
+. self.kh = keepalive.HTTPHandler()
+. self.opener = urllib2.build_opener(self.kh)
+[ 1
+. self.snarfed_logs = []
+. self.dbp = keepalive.DBPRINT
+. keepalive.DBPRINT = self.logsnarf
+. keepalive.DEBUG = 1
+.
+. def tearDown(self):
+. self.kh.close_all()
+. keepalive.DBPRINT = self.dbp
+. keepalive.DEBUG = 0
+.
+. def logsnarf(self, message):
+. self.snarfed_logs.append(message)
+] 1
+{ 1
+. self.db = keepalive.DEBUG
+. keepalive.DEBUG = FakeLogger()
+.
+. def tearDown(self):
+. self.kh.close_all()
+. keepalive.DEBUG = self.db
+}
+.
+. def test_dropped_connection(self):
+. "testing connection restarting (20-second delay, ctrl-c to skip)"
+. # the server has a 15-second keepalive timeout (the apache default)
+. fo = self.opener.open(ref_http)
+. data1 = fo.read()
+. fo.close()
+.
+. try: time.sleep(20)
+. except KeyboardInterrupt: self.skip()
+.
+. fo = self.opener.open(ref_http)
+. data2 = fo.read()
+. fo.close()
+.
+. reference_logs = [
+. 'creating new connection to www.linux.duke.edu',
+. 'STATUS: 200, OK',
+. 'failed to re-use connection to www.linux.duke.edu',
+. 'creating new connection to www.linux.duke.edu',
+. 'STATUS: 200, OK'
+. ]
+. self.assert_(data1 == data2)
+. l = [ re.sub(r'\s+\(-?\d+\)$', r'', line) for \
+[ 1
+. line in self.snarfed_logs ]
+] 1
+{ 1
+. line in keepalive.DEBUG.logs ]
+}
+. self.assert_(l == reference_logs)
+.
+. class ThreadingTests(TestCase):
+. def setUp(self):
+. self.kh = keepalive.HTTPHandler()
+. self.opener = urllib2.build_opener(self.kh)
+. self.snarfed_logs = []
+[ 1
+. self.dbp = keepalive.DBPRINT
+. keepalive.DBPRINT = self.logsnarf
+. keepalive.DEBUG = 1
+.
+. def tearDown(self):
+. self.kh.close_all()
+. keepalive.DBPRINT = self.dbp
+. keepalive.DEBUG = 0
+.
+. def logsnarf(self, message):
+. self.snarfed_logs.append(message)
+] 1
+{ 1
+. self.db = keepalive.DEBUG
+. keepalive.DEBUG = FakeLogger()
+.
+. def tearDown(self):
+. self.kh.close_all()
+. keepalive.DEBUG = self.db
+}
+.
+. def test_basic_threading(self):
+. "use 3 threads, each getting a file 4 times"
+. numthreads = 3
+. cond = threading.Condition()
+. self.threads = []
+. for i in range(numthreads):
+. t = Fetcher(self.opener, ref_http, 4)
+. t.start()
+. self.threads.append(t)
+. for t in self.threads: t.join()
+[ 1
+. l = [ re.sub(r'\s+\(-?\d+\)$', r'', line) for line in self.snarfed_logs ]
+] 1
+{ 1
+. l = [ re.sub(r'\s+\(-?\d+\)$', r'', line) for \
+. line in keepalive.DEBUG.logs ]
+}
+. l.sort()
+. creating = ['creating new connection to www.linux.duke.edu'] * 3
+. status = ['STATUS: 200, OK'] * 12
+. reuse = ['re-using connection to www.linux.duke.edu'] * 9
+. reference_logs = creating + status + reuse
+. reference_logs.sort()
+[ 1
+. #print '--------------------'
+. #for log in l: print log
+. #print '--------------------'
+. #for log in reference_logs: print log
+. #print '--------------------'
+] 1
+{ 1
+. if 0:
+. print '--------------------'
+. for log in l: print log
+. print '--------------------'
+. for log in reference_logs: print log
+. print '--------------------'
+}
+. self.assert_(l == reference_logs)
+.
+. class Fetcher(threading.Thread):
+. def __init__(self, opener, url, num):
+. threading.Thread.__init__(self)
+. self.opener = opener
+. self.url = url
+. self.num = num
+.
+. def run(self):
+. for i in range(self.num):
+. fo = self.opener.open(self.url)
+. data = fo.read()
+. fo.close()
+.
+. def suite():
+. tl = TestLoader()
+. return tl.loadTestsFromModule(sys.modules[__name__])
+.
+. if __name__ == '__main__':
+. runner = TextTestRunner(stream=sys.stdout,descriptions=1,verbosity=2)
+. runner.run(suite())
+.
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/f4/test_mirror.py-20051228065045-e8ed8cc88f29bd4a.weave
+++ urlgrabber-2.9.7/.bzr/weaves/f4/test_mirror.py-20051228065045-e8ed8cc88f29bd4a.weave
@@ -0,0 +1,344 @@
+# bzr weave file v5
+i
+1 ac372ad775435a897f189ae1262b5e879b65ca66
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+i 0
+1 8463302f9ae7296c66cf49fe7ec5c7cc85739075
+n wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
+
+w
+{ 0
+. #!/usr/bin/python -t
+.
+. # This library is free software; you can redistribute it and/or
+. # modify it under the terms of the GNU Lesser General Public
+. # License as published by the Free Software Foundation; either
+. # version 2.1 of the License, or (at your option) any later version.
+. #
+. # This library is distributed in the hope that it will be useful,
+. # but WITHOUT ANY WARRANTY; without even the implied warranty of
+. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+. # Lesser General Public License for more details.
+. #
+. # You should have received a copy of the GNU Lesser General Public
+. # License along with this library; if not, write to the
+. # Free Software Foundation, Inc.,
+. # 59 Temple Place, Suite 330,
+. # Boston, MA 02111-1307 USA
+.
+. # This file is part of urlgrabber, a high-level cross-protocol url-grabber
+. # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+.
+. """mirror.py tests"""
+.
+[ 1
+. # $Id: test_mirror.py,v 1.10 2005/02/14 21:55:06 mstenner Exp $
+] 1
+{ 1
+. # $Id: test_mirror.py,v 1.12 2005/10/22 21:57:27 mstenner Exp $
+}
+.
+. import sys
+. import os
+. import string, tempfile, random, cStringIO, os
+.
+. import urlgrabber.grabber
+. from urlgrabber.grabber import URLGrabber, URLGrabError
+. import urlgrabber.mirror
+. from urlgrabber.mirror import MirrorGroup, MGRandomStart, MGRandomOrder
+.
+. from base_test_code import *
+{ 1
+.
+. class FakeLogger:
+. def __init__(self):
+. self.logs = []
+. def debug(self, msg, *args):
+. self.logs.append(msg % args)
+. warn = warning = info = error = debug
+}
+.
+. class BasicTests(TestCase):
+. def setUp(self):
+. self.g = URLGrabber()
+. fullmirrors = [base_mirror_url + m + '/' for m in good_mirrors]
+. self.mg = MirrorGroup(self.g, fullmirrors)
+.
+. def test_urlgrab(self):
+. """MirrorGroup.urlgrab"""
+. filename = tempfile.mktemp()
+. url = 'short_reference'
+. self.mg.urlgrab(url, filename)
+.
+. fo = open(filename)
+. data = fo.read()
+. fo.close()
+.
+. self.assertEqual(data, short_reference_data)
+.
+. def test_urlread(self):
+. """MirrorGroup.urlread"""
+. url = 'short_reference'
+. data = self.mg.urlread(url)
+.
+. self.assertEqual(data, short_reference_data)
+.
+. def test_urlopen(self):
+. """MirrorGroup.urlopen"""
+. url = 'short_reference'
+. fo = self.mg.urlopen(url)
+. data = fo.read()
+. fo.close()
+.
+. self.assertEqual(data, short_reference_data)
+.
+. class SubclassTests(TestCase):
+. def setUp(self):
+. self.g = URLGrabber()
+. self.fullmirrors = [base_mirror_url + m + '/' for m in good_mirrors]
+.
+. def fetchwith(self, mgclass):
+. self.mg = mgclass(self.g, self.fullmirrors)
+.
+. filename = tempfile.mktemp()
+. url = 'short_reference'
+. self.mg.urlgrab(url, filename)
+.
+. fo = open(filename)
+. data = fo.read()
+. fo.close()
+.
+. self.assertEqual(data, short_reference_data)
+.
+. def test_MGRandomStart(self):
+. "MGRandomStart.urlgrab"
+. self.fetchwith(MGRandomStart)
+.
+. def test_MGRandomOrder(self):
+. "MGRandomOrder.urlgrab"
+. self.fetchwith(MGRandomOrder)
+.
+. class CallbackTests(TestCase):
+. def setUp(self):
+. self.g = URLGrabber()
+. fullmirrors = [base_mirror_url + m + '/' for m in \
+. (bad_mirrors + good_mirrors)]
+. self.mg = MirrorGroup(self.g, fullmirrors)
+.
+. def test_failure_callback(self):
+. "test that MG executes the failure callback correctly"
+. tricky_list = []
+. def failure_callback(cb_obj, tl):
+. tl.append(str(cb_obj.exception))
+. self.mg.failure_callback = failure_callback, (tricky_list, ), {}
+. data = self.mg.urlread('reference')
+. self.assert_(data == reference_data)
+[ 1
+. self.assertEquals(tricky_list[0][:33],
+. '[Errno 4] IOError: HTTP Error 403')
+] 1
+{ 1
+. self.assertEquals(tricky_list[0][:25],
+. '[Errno 14] HTTP Error 403')
+}
+.
+. def test_callback_reraise(self):
+. "test that the callback can correctly re-raise the exception"
+. def failure_callback(cb_obj): raise cb_obj.exception
+. self.mg.failure_callback = failure_callback
+. self.assertRaises(URLGrabError, self.mg.urlread, 'reference')
+.
+. class BadMirrorTests(TestCase):
+. def setUp(self):
+. self.g = URLGrabber()
+. fullmirrors = [base_mirror_url + m + '/' for m in bad_mirrors]
+. self.mg = MirrorGroup(self.g, fullmirrors)
+.
+. def test_simple_grab(self):
+. """test that a bad mirror raises URLGrabError"""
+. filename = tempfile.mktemp()
+. url = 'reference'
+. self.assertRaises(URLGrabError, self.mg.urlgrab, url, filename)
+.
+. class FailoverTests(TestCase):
+. def setUp(self):
+. self.g = URLGrabber()
+. fullmirrors = [base_mirror_url + m + '/' for m in \
+. (bad_mirrors + good_mirrors)]
+. self.mg = MirrorGroup(self.g, fullmirrors)
+.
+. def test_simple_grab(self):
+. """test that a the MG fails over past a bad mirror"""
+. filename = tempfile.mktemp()
+. url = 'reference'
+. elist = []
+. def cb(e, elist=elist): elist.append(e)
+. self.mg.urlgrab(url, filename, failure_callback=cb)
+.
+. fo = open(filename)
+. contents = fo.read()
+. fo.close()
+.
+. # first be sure that the first mirror failed and that the
+. # callback was called
+. self.assertEqual(len(elist), 1)
+. # now be sure that the second mirror succeeded and the correct
+. # data was returned
+. self.assertEqual(contents, reference_data)
+.
+. class FakeGrabber:
+. def __init__(self, resultlist=None):
+. self.resultlist = resultlist or []
+. self.index = 0
+. self.calls = []
+.
+. def urlgrab(self, url, filename=None, **kwargs):
+. self.calls.append( (url, filename) )
+. res = self.resultlist[self.index]
+. self.index += 1
+. if isinstance(res, Exception): raise res
+. else: return res
+.
+. class ActionTests(TestCase):
+. def setUp(self):
+. self.snarfed_logs = []
+[ 1
+. self.debug = urlgrabber.mirror.DEBUG
+. urlgrabber.mirror.DEBUG = 1
+. self.dbprint = urlgrabber.mirror.DBPRINT
+. urlgrabber.mirror.DBPRINT = self.logsnarf
+] 1
+{ 1
+. self.db = urlgrabber.mirror.DEBUG
+. urlgrabber.mirror.DEBUG = FakeLogger()
+}
+. self.mirrors = ['a', 'b', 'c', 'd', 'e', 'f']
+. self.g = FakeGrabber([URLGrabError(3), URLGrabError(3), 'filename'])
+. self.mg = MirrorGroup(self.g, self.mirrors)
+.
+[ 1
+. def logsnarf(self, message):
+. self.snarfed_logs.append(message)
+.
+] 1
+. def tearDown(self):
+[ 1
+. urlgrabber.mirror.DEBUG = self.debug
+. urlgrabber.mirror.DBPRINT = self.dbprint
+] 1
+{ 1
+. urlgrabber.mirror.DEBUG = self.db
+}
+.
+. def test_defaults(self):
+. 'test default action policy'
+. self.mg.urlgrab('somefile')
+. expected_calls = [ (m + '/' + 'somefile', None) \
+. for m in self.mirrors[:3] ]
+. expected_logs = \
+. ['MIRROR: trying somefile -> a/somefile',
+. 'MIRROR: failed',
+. 'GR mirrors: [b c d e f] 0',
+. 'MAIN mirrors: [a b c d e f] 1',
+. 'MIRROR: trying somefile -> b/somefile',
+. 'MIRROR: failed',
+. 'GR mirrors: [c d e f] 0',
+. 'MAIN mirrors: [a b c d e f] 2',
+. 'MIRROR: trying somefile -> c/somefile']
+.
+. self.assertEquals(self.g.calls, expected_calls)
+[ 1
+. self.assertEquals(self.snarfed_logs, expected_logs)
+] 1
+{ 1
+. self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
+}
+.
+. def test_instance_action(self):
+. 'test the effects of passed-in default_action'
+. self.mg.default_action = {'remove_master': 1}
+. self.mg.urlgrab('somefile')
+. expected_calls = [ (m + '/' + 'somefile', None) \
+. for m in self.mirrors[:3] ]
+. expected_logs = \
+. ['MIRROR: trying somefile -> a/somefile',
+. 'MIRROR: failed',
+. 'GR mirrors: [b c d e f] 0',
+. 'MAIN mirrors: [b c d e f] 0',
+. 'MIRROR: trying somefile -> b/somefile',
+. 'MIRROR: failed',
+. 'GR mirrors: [c d e f] 0',
+. 'MAIN mirrors: [c d e f] 0',
+. 'MIRROR: trying somefile -> c/somefile']
+.
+. self.assertEquals(self.g.calls, expected_calls)
+[ 1
+. self.assertEquals(self.snarfed_logs, expected_logs)
+] 1
+{ 1
+. self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
+}
+.
+. def test_method_action(self):
+. 'test the effects of method-level default_action'
+. self.mg.urlgrab('somefile', default_action={'remove_master': 1})
+. expected_calls = [ (m + '/' + 'somefile', None) \
+. for m in self.mirrors[:3] ]
+. expected_logs = \
+. ['MIRROR: trying somefile -> a/somefile',
+. 'MIRROR: failed',
+. 'GR mirrors: [b c d e f] 0',
+. 'MAIN mirrors: [b c d e f] 0',
+. 'MIRROR: trying somefile -> b/somefile',
+. 'MIRROR: failed',
+. 'GR mirrors: [c d e f] 0',
+. 'MAIN mirrors: [c d e f] 0',
+. 'MIRROR: trying somefile -> c/somefile']
+.
+. self.assertEquals(self.g.calls, expected_calls)
+[ 1
+. self.assertEquals(self.snarfed_logs, expected_logs)
+] 1
+{ 1
+. self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
+}
+.
+.
+. def callback(self, e): return {'fail': 1}
+.
+. def test_callback_action(self):
+. 'test the effects of a callback-returned action'
+. self.assertRaises(URLGrabError, self.mg.urlgrab, 'somefile',
+. failure_callback=self.callback)
+. expected_calls = [ (m + '/' + 'somefile', None) \
+. for m in self.mirrors[:1] ]
+. expected_logs = \
+. ['MIRROR: trying somefile -> a/somefile',
+. 'MIRROR: failed',
+. 'GR mirrors: [b c d e f] 0',
+. 'MAIN mirrors: [a b c d e f] 1']
+.
+. self.assertEquals(self.g.calls, expected_calls)
+[ 1
+. self.assertEquals(self.snarfed_logs, expected_logs)
+] 1
+{ 1
+. self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
+}
+.
+.
+. def suite():
+. tl = TestLoader()
+. return tl.loadTestsFromModule(sys.modules[__name__])
+.
+. if __name__ == '__main__':
+[ 1
+. urlgrabber.grabber.DEBUG = 0
+. urlgrabber.mirror.DEBUG = 0
+] 1
+. runner = TextTestRunner(stream=sys.stdout,descriptions=1,verbosity=2)
+. runner.run(suite())
+.
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/3b/threading-20051228065045-66a60b5b22eb786f.weave
+++ urlgrabber-2.9.7/.bzr/weaves/3b/threading-20051228065045-66a60b5b22eb786f.weave
@@ -0,0 +1,7 @@
+# bzr weave file v5
+i
+1 da39a3ee5e6b4b0d3255bfef95601890afd80709
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/2e/batchgrabber.py-20051228065045-125bac347714ca01.weave
+++ urlgrabber-2.9.7/.bzr/weaves/2e/batchgrabber.py-20051228065045-125bac347714ca01.weave
@@ -0,0 +1,119 @@
+# bzr weave file v5
+i
+1 025003b82c68255a04427c6d1e2f7dfa774fce3a
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+{ 0
+. # This library is free software; you can redistribute it and/or
+. # modify it under the terms of the GNU Lesser General Public
+. # License as published by the Free Software Foundation; either
+. # version 2.1 of the License, or (at your option) any later version.
+. #
+. # This library is distributed in the hope that it will be useful,
+. # but WITHOUT ANY WARRANTY; without even the implied warranty of
+. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+. # Lesser General Public License for more details.
+. #
+. # You should have received a copy of the GNU Lesser General Public
+. # License along with this library; if not, write to the
+. # Free Software Foundation, Inc.,
+. # 59 Temple Place, Suite 330,
+. # Boston, MA 02111-1307 USA
+.
+. # This file is part of urlgrabber, a high-level cross-protocol url-grabber
+. # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+.
+. """Module for testing urlgrabber under multiple threads.
+.
+. This module can be used from the command line. Each argument is
+. a URL to grab.
+.
+. The BatchURLGrabber class has an interface similar to URLGrabber
+. but instead of pulling files when urlgrab is called, the request
+. is queued. Calling BatchURLGrabber.batchgrab causes all files to
+. be pulled in multiple threads.
+.
+. """
+.
+. import os.path, sys
+. if __name__ == '__main__':
+. print os.path.dirname(sys.argv[0])
+. sys.path.insert(0, (os.path.dirname(sys.argv[0]) or '.') + '/../..')
+.
+. from threading import Thread, Semaphore
+. from urlgrabber.grabber import URLGrabber, URLGrabError
+. from urlgrabber.progress import MultiFileMeter, TextMultiFileMeter
+. from time import sleep, time
+.
+. DEBUG=0
+.
+. class BatchURLGrabber:
+. def __init__(self, maxthreads=5, **kwargs):
+. self.maxthreads = 5
+. self.grabber = URLGrabber(**kwargs)
+. self.queue = []
+. self.threads = []
+. self.sem = Semaphore()
+.
+. def urlgrab(self, url, filename=None, **kwargs):
+. self.queue.append( (url, filename, kwargs) )
+.
+. def batchgrab(self):
+. if hasattr(self.grabber.opts.progress_obj, 'start'):
+. self.grabber.opts.progress_obj.start(len(self.queue))
+. while self.queue or self.threads:
+. if self.queue and (len(self.threads) < self.maxthreads):
+. url, filename, kwargs = self.queue[0]
+. del self.queue[0]
+. thread = Worker(self, url, filename, kwargs)
+. self.threads.append(thread)
+. if DEBUG: print "starting worker: " + url
+. thread.start()
+. else:
+. for t in self.threads:
+. if not t.isAlive():
+. if DEBUG: print "cleaning up worker: " + t.url
+. self.threads.remove(t)
+. #if len(self.threads) == self.maxthreads:
+. # sleep(0.2)
+. sleep(0.2)
+.
+. class Worker(Thread):
+. def __init__(self, parent, url, filename, kwargs):
+. Thread.__init__(self)
+. self.parent = parent
+. self.url = url
+. self.filename = filename
+. self.kwargs = kwargs
+.
+. def run(self):
+. if DEBUG: print "worker thread started."
+. grabber = self.parent.grabber
+. progress_obj = grabber.opts.progress_obj
+. if isinstance(progress_obj, MultiFileMeter):
+. self.kwargs['progress_obj'] = progress_obj.newMeter()
+. try:
+. rslt = self.parent.grabber.urlgrab(self.url, self.filename, **self.kwargs)
+. except URLGrabError, e:
+. print '%s, %s' % (e, self.url)
+.
+. def main():
+. progress_obj = None
+. # uncomment to play with BatchProgressMeter (doesn't work right now)
+. # progress_obj = TextMultiFileMeter()
+. g = BatchURLGrabber(keepalive=1, progress_obj=progress_obj)
+. for arg in sys.argv[1:]:
+. g.urlgrab(arg)
+. if DEBUG: print "before batchgrab"
+. try:
+. g.batchgrab()
+. except KeyboardInterrupt:
+. sys.exit(1)
+.
+. if DEBUG: print "after batchgrab"
+.
+. if __name__ == '__main__':
+. main()
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/44/urlgrabber-20051228065045-f0893108ca7f0221.weave
+++ urlgrabber-2.9.7/.bzr/weaves/44/urlgrabber-20051228065045-f0893108ca7f0221.weave
@@ -0,0 +1,7 @@
+# bzr weave file v5
+i
+1 da39a3ee5e6b4b0d3255bfef95601890afd80709
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+w
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/76/__init__.py-20051228065045-0c846387e84b0c78.weave
+++ urlgrabber-2.9.7/.bzr/weaves/76/__init__.py-20051228065045-0c846387e84b0c78.weave
@@ -0,0 +1,77 @@
+# bzr weave file v5
+i
+1 31e5ed0254394e2733278f6371648f6b43b890db
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+i 0
+1 49d097cabb97a0e1904dbe75a4f0242d763ae76d
+n wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
+
+w
+{ 0
+. # This program is free software; you can redistribute it and/or modify
+. # it under the terms of the GNU General Public License as published by
+. # the Free Software Foundation; either version 2 of the License, or
+. # (at your option) any later version.
+. #
+. # This program is distributed in the hope that it will be useful,
+. # but WITHOUT ANY WARRANTY; without even the implied warranty of
+. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+. # GNU Library General Public License for more details.
+. #
+. # You should have received a copy of the GNU General Public License
+. # along with this program; if not, write to the Free Software
+. # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+.
+. # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+.
+[ 1
+. # $Id: __init__.py,v 1.14 2005/03/08 22:54:25 mstenner Exp $
+] 1
+{ 1
+. # $Id: __init__.py,v 1.15 2005/10/22 22:05:12 mstenner Exp $
+}
+.
+. """A high-level cross-protocol url-grabber.
+.
+. Using urlgrabber, data can be fetched in three basic ways:
+.
+. urlgrab(url) copy the file to the local filesystem
+. urlopen(url) open the remote file and return a file object
+. (like urllib2.urlopen)
+. urlread(url) return the contents of the file as a string
+.
+. When using these functions (or methods), urlgrabber supports the
+. following features:
+.
+. * identical behavior for http://, ftp://, and file:// urls
+. * http keepalive - faster downloads of many files by using
+. only a single connection
+. * byte ranges - fetch only a portion of the file
+. * reget - for a urlgrab, resume a partial download
+. * progress meters - the ability to report download progress
+. automatically, even when using urlopen!
+. * throttling - restrict bandwidth usage
+. * retries - automatically retry a download if it fails. The
+. number of retries and failure types are configurable.
+. * authenticated server access for http and ftp
+. * proxy support - support for authenticated http and ftp proxies
+. * mirror groups - treat a list of mirrors as a single source,
+. automatically switching mirrors if there is a failure.
+. """
+.
+[ 1
+. __version__ = '2.9.6'
+. __date__ = '2005/03/08'
+] 1
+{ 1
+. __version__ = '2.9.7'
+. __date__ = '2005/10/22'
+}
+. __author__ = 'Michael D. Stenner , ' \
+. 'Ryan Tomayko '
+. __url__ = 'http://linux.duke.edu/projects/urlgrabber/'
+.
+. from grabber import urlgrab, urlopen, urlread
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/76/debian-20051231042924-0dc34beb2f68f9de.weave
+++ urlgrabber-2.9.7/.bzr/weaves/76/debian-20051231042924-0dc34beb2f68f9de.weave
@@ -0,0 +1,7 @@
+# bzr weave file v5
+i
+1 da39a3ee5e6b4b0d3255bfef95601890afd80709
+n wildfire@progsoc.org-20051231042934-0a9043755ce47970
+
+w
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/95/byterange.py-20051228065045-2ccea88899ef14a1.weave
+++ urlgrabber-2.9.7/.bzr/weaves/95/byterange.py-20051228065045-2ccea88899ef14a1.weave
@@ -0,0 +1,484 @@
+# bzr weave file v5
+i
+1 dd73f81b5531d542d381edd54e66e16a69879988
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+i 0
+1 a3b5c65a856f970abf925f6a9e6864f8f83cfc43
+n wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
+
+w
+{ 0
+. # This library is free software; you can redistribute it and/or
+. # modify it under the terms of the GNU Lesser General Public
+. # License as published by the Free Software Foundation; either
+. # version 2.1 of the License, or (at your option) any later version.
+. #
+. # This library is distributed in the hope that it will be useful,
+. # but WITHOUT ANY WARRANTY; without even the implied warranty of
+. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+. # Lesser General Public License for more details.
+. #
+. # You should have received a copy of the GNU Lesser General Public
+. # License along with this library; if not, write to the
+. # Free Software Foundation, Inc.,
+. # 59 Temple Place, Suite 330,
+. # Boston, MA 02111-1307 USA
+.
+. # This file is part of urlgrabber, a high-level cross-protocol url-grabber
+. # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+.
+[ 1
+. # $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
+] 1
+{ 1
+. # $Id: byterange.py,v 1.11 2005/10/22 21:57:28 mstenner Exp $
+}
+.
+. import os
+. import stat
+. import urllib
+. import urllib2
+. import rfc822
+{ 1
+.
+. DEBUG = None
+}
+.
+. try:
+. from cStringIO import StringIO
+. except ImportError, msg:
+. from StringIO import StringIO
+.
+. class RangeError(IOError):
+. """Error raised when an unsatisfiable range is requested."""
+. pass
+.
+. class HTTPRangeHandler(urllib2.BaseHandler):
+. """Handler that enables HTTP Range headers.
+.
+. This was extremely simple. The Range header is a HTTP feature to
+. begin with so all this class does is tell urllib2 that the
+. "206 Partial Content" reponse from the HTTP server is what we
+. expected.
+.
+. Example:
+. import urllib2
+. import byterange
+.
+. range_handler = range.HTTPRangeHandler()
+. opener = urllib2.build_opener(range_handler)
+.
+. # install it
+. urllib2.install_opener(opener)
+.
+. # create Request and set Range header
+. req = urllib2.Request('http://www.python.org/')
+. req.header['Range'] = 'bytes=30-50'
+. f = urllib2.urlopen(req)
+. """
+.
+. def http_error_206(self, req, fp, code, msg, hdrs):
+. # 206 Partial Content Response
+. r = urllib.addinfourl(fp, hdrs, req.get_full_url())
+. r.code = code
+. r.msg = msg
+. return r
+.
+. def http_error_416(self, req, fp, code, msg, hdrs):
+. # HTTP's Range Not Satisfiable error
+. raise RangeError('Requested Range Not Satisfiable')
+.
+. class RangeableFileObject:
+. """File object wrapper to enable raw range handling.
+. This was implemented primarilary for handling range
+. specifications for file:// urls. This object effectively makes
+. a file object look like it consists only of a range of bytes in
+. the stream.
+.
+. Examples:
+. # expose 10 bytes, starting at byte position 20, from
+. # /etc/aliases.
+. >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
+. # seek seeks within the range (to position 23 in this case)
+. >>> fo.seek(3)
+. # tell tells where your at _within the range_ (position 3 in
+. # this case)
+. >>> fo.tell()
+. # read EOFs if an attempt is made to read past the last
+. # byte in the range. the following will return only 7 bytes.
+. >>> fo.read(30)
+. """
+.
+. def __init__(self, fo, rangetup):
+. """Create a RangeableFileObject.
+. fo -- a file like object. only the read() method need be
+. supported but supporting an optimized seek() is
+. preferable.
+. rangetup -- a (firstbyte,lastbyte) tuple specifying the range
+. to work over.
+. The file object provided is assumed to be at byte offset 0.
+. """
+. self.fo = fo
+. (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
+. self.realpos = 0
+. self._do_seek(self.firstbyte)
+.
+. def __getattr__(self, name):
+. """This effectively allows us to wrap at the instance level.
+. Any attribute not found in _this_ object will be searched for
+. in self.fo. This includes methods."""
+. if hasattr(self.fo, name):
+. return getattr(self.fo, name)
+. raise AttributeError, name
+.
+. def tell(self):
+. """Return the position within the range.
+. This is different from fo.seek in that position 0 is the
+. first byte position of the range tuple. For example, if
+. this object was created with a range tuple of (500,899),
+. tell() will return 0 when at byte position 500 of the file.
+. """
+. return (self.realpos - self.firstbyte)
+.
+. def seek(self,offset,whence=0):
+. """Seek within the byte range.
+. Positioning is identical to that described under tell().
+. """
+. assert whence in (0, 1, 2)
+. if whence == 0: # absolute seek
+. realoffset = self.firstbyte + offset
+. elif whence == 1: # relative seek
+. realoffset = self.realpos + offset
+. elif whence == 2: # absolute from end of file
+. # XXX: are we raising the right Error here?
+. raise IOError('seek from end of file not supported.')
+.
+. # do not allow seek past lastbyte in range
+. if self.lastbyte and (realoffset >= self.lastbyte):
+. realoffset = self.lastbyte
+.
+. self._do_seek(realoffset - self.realpos)
+.
+. def read(self, size=-1):
+. """Read within the range.
+. This method will limit the size read based on the range.
+. """
+. size = self._calc_read_size(size)
+. rslt = self.fo.read(size)
+. self.realpos += len(rslt)
+. return rslt
+.
+. def readline(self, size=-1):
+. """Read lines within the range.
+. This method will limit the size read based on the range.
+. """
+. size = self._calc_read_size(size)
+. rslt = self.fo.readline(size)
+. self.realpos += len(rslt)
+. return rslt
+.
+. def _calc_read_size(self, size):
+. """Handles calculating the amount of data to read based on
+. the range.
+. """
+. if self.lastbyte:
+. if size > -1:
+. if ((self.realpos + size) >= self.lastbyte):
+. size = (self.lastbyte - self.realpos)
+. else:
+. size = (self.lastbyte - self.realpos)
+. return size
+.
+. def _do_seek(self,offset):
+. """Seek based on whether wrapped object supports seek().
+. offset is relative to the current position (self.realpos).
+. """
+. assert offset >= 0
+. if not hasattr(self.fo, 'seek'):
+. self._poor_mans_seek(offset)
+. else:
+. self.fo.seek(self.realpos + offset)
+. self.realpos+= offset
+.
+. def _poor_mans_seek(self,offset):
+. """Seek by calling the wrapped file objects read() method.
+. This is used for file like objects that do not have native
+. seek support. The wrapped objects read() method is called
+. to manually seek to the desired position.
+. offset -- read this number of bytes from the wrapped
+. file object.
+. raise RangeError if we encounter EOF before reaching the
+. specified offset.
+. """
+. pos = 0
+. bufsize = 1024
+. while pos < offset:
+. if (pos + bufsize) > offset:
+. bufsize = offset - pos
+. buf = self.fo.read(bufsize)
+. if len(buf) != bufsize:
+. raise RangeError('Requested Range Not Satisfiable')
+. pos+= bufsize
+.
+. class FileRangeHandler(urllib2.FileHandler):
+. """FileHandler subclass that adds Range support.
+. This class handles Range headers exactly like an HTTP
+. server would.
+. """
+. def open_local_file(self, req):
+. import mimetypes
+. import mimetools
+. host = req.get_host()
+. file = req.get_selector()
+. localfile = urllib.url2pathname(file)
+. stats = os.stat(localfile)
+. size = stats[stat.ST_SIZE]
+. modified = rfc822.formatdate(stats[stat.ST_MTIME])
+. mtype = mimetypes.guess_type(file)[0]
+. if host:
+. host, port = urllib.splitport(host)
+. if port or socket.gethostbyname(host) not in self.get_names():
+[ 1
+. raise URLError('file not on local host')
+] 1
+{ 1
+. raise urllib2.URLError('file not on local host')
+}
+. fo = open(localfile,'rb')
+. brange = req.headers.get('Range',None)
+. brange = range_header_to_tuple(brange)
+. assert brange != ()
+. if brange:
+. (fb,lb) = brange
+. if lb == '': lb = size
+. if fb < 0 or fb > size or lb > size:
+. raise RangeError('Requested Range Not Satisfiable')
+. size = (lb - fb)
+. fo = RangeableFileObject(fo, (fb,lb))
+. headers = mimetools.Message(StringIO(
+. 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
+. (mtype or 'text/plain', size, modified)))
+. return urllib.addinfourl(fo, headers, 'file:'+file)
+.
+.
+. # FTP Range Support
+. # Unfortunately, a large amount of base FTP code had to be copied
+. # from urllib and urllib2 in order to insert the FTP REST command.
+. # Code modifications for range support have been commented as
+. # follows:
+. # -- range support modifications start/end here
+.
+. from urllib import splitport, splituser, splitpasswd, splitattr, \
+. unquote, addclosehook, addinfourl
+. import ftplib
+. import socket
+. import sys
+. import ftplib
+. import mimetypes
+. import mimetools
+.
+. class FTPRangeHandler(urllib2.FTPHandler):
+. def ftp_open(self, req):
+. host = req.get_host()
+. if not host:
+. raise IOError, ('ftp error', 'no host given')
+. host, port = splitport(host)
+. if port is None:
+. port = ftplib.FTP_PORT
+.
+. # username/password handling
+. user, host = splituser(host)
+. if user:
+. user, passwd = splitpasswd(user)
+. else:
+. passwd = None
+. host = unquote(host)
+. user = unquote(user or '')
+. passwd = unquote(passwd or '')
+.
+. try:
+. host = socket.gethostbyname(host)
+. except socket.error, msg:
+[ 1
+. raise URLError(msg)
+] 1
+{ 1
+. raise urllib2.URLError(msg)
+}
+. path, attrs = splitattr(req.get_selector())
+. dirs = path.split('/')
+. dirs = map(unquote, dirs)
+. dirs, file = dirs[:-1], dirs[-1]
+. if dirs and not dirs[0]:
+. dirs = dirs[1:]
+. try:
+. fw = self.connect_ftp(user, passwd, host, port, dirs)
+. type = file and 'I' or 'D'
+. for attr in attrs:
+. attr, value = splitattr(attr)
+. if attr.lower() == 'type' and \
+. value in ('a', 'A', 'i', 'I', 'd', 'D'):
+. type = value.upper()
+.
+. # -- range support modifications start here
+. rest = None
+. range_tup = range_header_to_tuple(req.headers.get('Range',None))
+. assert range_tup != ()
+. if range_tup:
+. (fb,lb) = range_tup
+. if fb > 0: rest = fb
+. # -- range support modifications end here
+.
+. fp, retrlen = fw.retrfile(file, type, rest)
+.
+. # -- range support modifications start here
+. if range_tup:
+. (fb,lb) = range_tup
+. if lb == '':
+. if retrlen is None or retrlen == 0:
+. raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
+. lb = retrlen
+. retrlen = lb - fb
+. if retrlen < 0:
+. # beginning of range is larger than file
+. raise RangeError('Requested Range Not Satisfiable')
+. else:
+. retrlen = lb - fb
+. fp = RangeableFileObject(fp, (0,retrlen))
+. # -- range support modifications end here
+.
+. headers = ""
+. mtype = mimetypes.guess_type(req.get_full_url())[0]
+. if mtype:
+. headers += "Content-Type: %s\n" % mtype
+. if retrlen is not None and retrlen >= 0:
+. headers += "Content-Length: %d\n" % retrlen
+. sf = StringIO(headers)
+. headers = mimetools.Message(sf)
+. return addinfourl(fp, headers, req.get_full_url())
+. except ftplib.all_errors, msg:
+. raise IOError, ('ftp error', msg), sys.exc_info()[2]
+.
+. def connect_ftp(self, user, passwd, host, port, dirs):
+. fw = ftpwrapper(user, passwd, host, port, dirs)
+. return fw
+.
+. class ftpwrapper(urllib.ftpwrapper):
+. # range support note:
+. # this ftpwrapper code is copied directly from
+. # urllib. The only enhancement is to add the rest
+. # argument and pass it on to ftp.ntransfercmd
+. def retrfile(self, file, type, rest=None):
+. self.endtransfer()
+. if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
+. else: cmd = 'TYPE ' + type; isdir = 0
+. try:
+. self.ftp.voidcmd(cmd)
+. except ftplib.all_errors:
+. self.init()
+. self.ftp.voidcmd(cmd)
+. conn = None
+. if file and not isdir:
+. # Use nlst to see if the file exists at all
+. try:
+. self.ftp.nlst(file)
+. except ftplib.error_perm, reason:
+. raise IOError, ('ftp error', reason), sys.exc_info()[2]
+. # Restore the transfer mode!
+. self.ftp.voidcmd(cmd)
+. # Try to retrieve as a file
+. try:
+. cmd = 'RETR ' + file
+. conn = self.ftp.ntransfercmd(cmd, rest)
+. except ftplib.error_perm, reason:
+. if str(reason)[:3] == '501':
+. # workaround for REST not supported error
+. fp, retrlen = self.retrfile(file, type)
+. fp = RangeableFileObject(fp, (rest,''))
+. return (fp, retrlen)
+. elif str(reason)[:3] != '550':
+. raise IOError, ('ftp error', reason), sys.exc_info()[2]
+. if not conn:
+. # Set transfer mode to ASCII!
+. self.ftp.voidcmd('TYPE A')
+. # Try a directory listing
+. if file: cmd = 'LIST ' + file
+. else: cmd = 'LIST'
+. conn = self.ftp.ntransfercmd(cmd)
+. self.busy = 1
+. # Pass back both a suitably decorated object and a retrieval length
+. return (addclosehook(conn[0].makefile('rb'),
+. self.endtransfer), conn[1])
+.
+.
+. ####################################################################
+. # Range Tuple Functions
+. # XXX: These range tuple functions might go better in a class.
+.
+. _rangere = None
+. def range_header_to_tuple(range_header):
+. """Get a (firstbyte,lastbyte) tuple from a Range header value.
+.
+. Range headers have the form "bytes=-". This
+. function pulls the firstbyte and lastbyte values and returns
+. a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
+. the header value, it is returned as an empty string in the
+. tuple.
+.
+. Return None if range_header is None
+. Return () if range_header does not conform to the range spec
+. pattern.
+.
+. """
+. global _rangere
+. if range_header is None: return None
+. if _rangere is None:
+. import re
+. _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
+. match = _rangere.match(range_header)
+. if match:
+. tup = range_tuple_normalize(match.group(1,2))
+. if tup and tup[1]:
+. tup = (tup[0],tup[1]+1)
+. return tup
+. return ()
+.
+. def range_tuple_to_header(range_tup):
+. """Convert a range tuple to a Range header value.
+. Return a string of the form "bytes=-" or None
+. if no range is needed.
+. """
+. if range_tup is None: return None
+. range_tup = range_tuple_normalize(range_tup)
+. if range_tup:
+. if range_tup[1]:
+. range_tup = (range_tup[0],range_tup[1] - 1)
+. return 'bytes=%s-%s' % range_tup
+.
+. def range_tuple_normalize(range_tup):
+. """Normalize a (first_byte,last_byte) range tuple.
+. Return a tuple whose first element is guaranteed to be an int
+. and whose second element will be '' (meaning: the last byte) or
+. an int. Finally, return None if the normalized tuple == (0,'')
+. as that is equivelant to retrieving the entire file.
+. """
+. if range_tup is None: return None
+. # handle first byte
+. fb = range_tup[0]
+. if fb in (None,''): fb = 0
+. else: fb = int(fb)
+. # handle last byte
+. try: lb = range_tup[1]
+. except IndexError: lb = ''
+. else:
+. if lb is None: lb = ''
+. elif lb != '': lb = int(lb)
+. # check if range is over the entire file
+. if (fb,lb) == (0,''): return None
+. # check that the range is valid
+. if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb))
+. return (fb,lb)
+.
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/fe/grabber.py-20051228065045-af5c0e621ec98bbe.weave
+++ urlgrabber-2.9.7/.bzr/weaves/fe/grabber.py-20051228065045-af5c0e621ec98bbe.weave
@@ -0,0 +1,1480 @@
+# bzr weave file v5
+i
+1 046d9dbb9e6f6bde11a0b5e11483a892cfe17ad4
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+i 0
+1 b77dac5072dbd2337181abdf452393f7bb47ef27
+n wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
+
+w
+{ 0
+. # This library is free software; you can redistribute it and/or
+. # modify it under the terms of the GNU Lesser General Public
+. # License as published by the Free Software Foundation; either
+. # version 2.1 of the License, or (at your option) any later version.
+. #
+. # This library is distributed in the hope that it will be useful,
+. # but WITHOUT ANY WARRANTY; without even the implied warranty of
+. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+. # Lesser General Public License for more details.
+. #
+. # You should have received a copy of the GNU Lesser General Public
+. # License along with this library; if not, write to the
+. # Free Software Foundation, Inc.,
+. # 59 Temple Place, Suite 330,
+. # Boston, MA 02111-1307 USA
+.
+. # This file is part of urlgrabber, a high-level cross-protocol url-grabber
+. # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+.
+. """A high-level cross-protocol url-grabber.
+.
+. GENERAL ARGUMENTS (kwargs)
+.
+. Where possible, the module-level default is indicated, and legal
+. values are provided.
+.
+. copy_local = 0 [0|1]
+.
+. ignored except for file:// urls, in which case it specifies
+. whether urlgrab should still make a copy of the file, or simply
+. point to the existing copy. The module level default for this
+. option is 0.
+.
+. close_connection = 0 [0|1]
+.
+. tells URLGrabber to close the connection after a file has been
+. transfered. This is ignored unless the download happens with the
+. http keepalive handler (keepalive=1). Otherwise, the connection
+. is left open for further use. The module level default for this
+. option is 0 (keepalive connections will not be closed).
+.
+. keepalive = 1 [0|1]
+.
+. specifies whether keepalive should be used for HTTP/1.1 servers
+. that support it. The module level default for this option is 1
+. (keepalive is enabled).
+.
+. progress_obj = None
+.
+. a class instance that supports the following methods:
+. po.start(filename, url, basename, length, text)
+. # length will be None if unknown
+. po.update(read) # read == bytes read so far
+. po.end()
+.
+. text = None
+.
+. specifies an alternativ text item in the beginning of the progress
+. bar line. If not given, the basename of the file is used.
+.
+. throttle = 1.0
+.
+. a number - if it's an int, it's the bytes/second throttle limit.
+. If it's a float, it is first multiplied by bandwidth. If throttle
+. == 0, throttling is disabled. If None, the module-level default
+. (which can be set on default_grabber.throttle) is used. See
+. BANDWIDTH THROTTLING for more information.
+.
+. timeout = None
+.
+. a positive float expressing the number of seconds to wait for socket
+. operations. If the value is None or 0.0, socket operations will block
+. forever. Setting this option causes urlgrabber to call the settimeout
+. method on the Socket object used for the request. See the Python
+. documentation on settimeout for more information.
+. http://www.python.org/doc/current/lib/socket-objects.html
+.
+. bandwidth = 0
+.
+. the nominal max bandwidth in bytes/second. If throttle is a float
+. and bandwidth == 0, throttling is disabled. If None, the
+. module-level default (which can be set on
+. default_grabber.bandwidth) is used. See BANDWIDTH THROTTLING for
+. more information.
+.
+. range = None
+.
+. a tuple of the form (first_byte, last_byte) describing a byte
+. range to retrieve. Either or both of the values may set to
+. None. If first_byte is None, byte offset 0 is assumed. If
+. last_byte is None, the last byte available is assumed. Note that
+. the range specification is python-like in that (0,10) will yeild
+. the first 10 bytes of the file.
+.
+. If set to None, no range will be used.
+.
+. reget = None [None|'simple'|'check_timestamp']
+.
+. whether to attempt to reget a partially-downloaded file. Reget
+. only applies to .urlgrab and (obviously) only if there is a
+. partially downloaded file. Reget has two modes:
+.
+. 'simple' -- the local file will always be trusted. If there
+. are 100 bytes in the local file, then the download will always
+. begin 100 bytes into the requested file.
+.
+. 'check_timestamp' -- the timestamp of the server file will be
+. compared to the timestamp of the local file. ONLY if the
+. local file is newer than or the same age as the server file
+. will reget be used. If the server file is newer, or the
+. timestamp is not returned, the entire file will be fetched.
+.
+. NOTE: urlgrabber can do very little to verify that the partial
+. file on disk is identical to the beginning of the remote file.
+. You may want to either employ a custom "checkfunc" or simply avoid
+. using reget in situations where corruption is a concern.
+.
+. user_agent = 'urlgrabber/VERSION'
+.
+. a string, usually of the form 'AGENT/VERSION' that is provided to
+. HTTP servers in the User-agent header. The module level default
+. for this option is "urlgrabber/VERSION".
+.
+. http_headers = None
+.
+. a tuple of 2-tuples, each containing a header and value. These
+. will be used for http and https requests only. For example, you
+. can do
+. http_headers = (('Pragma', 'no-cache'),)
+.
+. ftp_headers = None
+.
+. this is just like http_headers, but will be used for ftp requests.
+.
+. proxies = None
+.
+. a dictionary that maps protocol schemes to proxy hosts. For
+. example, to use a proxy server on host "foo" port 3128 for http
+. and https URLs:
+. proxies={ 'http' : 'http://foo:3128', 'https' : 'http://foo:3128' }
+. note that proxy authentication information may be provided using
+. normal URL constructs:
+. proxies={ 'http' : 'http://user:host@foo:3128' }
+. Lastly, if proxies is None, the default environment settings will
+. be used.
+.
+. prefix = None
+.
+. a url prefix that will be prepended to all requested urls. For
+. example:
+. g = URLGrabber(prefix='http://foo.com/mirror/')
+. g.urlgrab('some/file.txt')
+. ## this will fetch 'http://foo.com/mirror/some/file.txt'
+. This option exists primarily to allow identical behavior to
+. MirrorGroup (and derived) instances. Note: a '/' will be inserted
+. if necessary, so you cannot specify a prefix that ends with a
+. partial file or directory name.
+.
+. opener = None
+.
+. Overrides the default urllib2.OpenerDirector provided to urllib2
+. when making requests. This option exists so that the urllib2
+. handler chain may be customized. Note that the range, reget,
+. proxy, and keepalive features require that custom handlers be
+. provided to urllib2 in order to function properly. If an opener
+. option is provided, no attempt is made by urlgrabber to ensure
+. chain integrity. You are responsible for ensuring that any
+. extension handlers are present if said features are required.
+.
+{ 1
+. data = None
+.
+. Only relevant for the HTTP family (and ignored for other
+. protocols), this allows HTTP POSTs. When the data kwarg is
+. present (and not None), an HTTP request will automatically become
+. a POST rather than GET. This is done by direct passthrough to
+. urllib2. If you use this, you may also want to set the
+. 'Content-length' and 'Content-type' headers with the http_headers
+. option. Note that python 2.2 handles the case of these
+. badly and if you do not use the proper case (shown here), your
+. values will be overridden with the defaults.
+.
+.
+}
+. RETRY RELATED ARGUMENTS
+.
+. retry = None
+.
+. the number of times to retry the grab before bailing. If this is
+. zero, it will retry forever. This was intentional... really, it
+. was :). If this value is not supplied or is supplied but is None
+. retrying does not occur.
+.
+. retrycodes = [-1,2,4,5,6,7]
+.
+. a sequence of errorcodes (values of e.errno) for which it should
+[ 1
+. retry. See the doc on URLGrabError for more details on
+. this. retrycodes defaults to [-1,2,4,5,6,7] if not specified
+. explicitly.
+.
+] 1
+{ 1
+. retry. See the doc on URLGrabError for more details on this. You
+. might consider modifying a copy of the default codes rather than
+. building yours from scratch so that if the list is extended in the
+. future (or one code is split into two) you can still enjoy the
+. benefits of the default list. You can do that with something like
+. this:
+.
+. retrycodes = urlgrabber.grabber.URLGrabberOptions().retrycodes
+. if 12 not in retrycodes:
+. retrycodes.append(12)
+.
+}
+. checkfunc = None
+.
+. a function to do additional checks. This defaults to None, which
+. means no additional checking. The function should simply return
+. on a successful check. It should raise URLGrabError on an
+. unsuccessful check. Raising of any other exception will be
+. considered immediate failure and no retries will occur.
+.
+. If it raises URLGrabError, the error code will determine the retry
+. behavior. Negative error numbers are reserved for use by these
+. passed in functions, so you can use many negative numbers for
+. different types of failure. By default, -1 results in a retry,
+. but this can be customized with retrycodes.
+.
+. If you simply pass in a function, it will be given exactly one
+. argument: a CallbackObject instance with the .url attribute
+. defined and either .filename (for urlgrab) or .data (for urlread).
+. For urlgrab, .filename is the name of the local file. For
+. urlread, .data is the actual string data. If you need other
+. arguments passed to the callback (program state of some sort), you
+. can do so like this:
+.
+. checkfunc=(function, ('arg1', 2), {'kwarg': 3})
+.
+. if the downloaded file has filename /tmp/stuff, then this will
+. result in this call (for urlgrab):
+.
+. function(obj, 'arg1', 2, kwarg=3)
+. # obj.filename = '/tmp/stuff'
+. # obj.url = 'http://foo.com/stuff'
+.
+. NOTE: both the "args" tuple and "kwargs" dict must be present if
+. you use this syntax, but either (or both) can be empty.
+.
+. failure_callback = None
+.
+. The callback that gets called during retries when an attempt to
+. fetch a file fails. The syntax for specifying the callback is
+. identical to checkfunc, except for the attributes defined in the
+[ 1
+. CallbackObject instance. In this case, it will have .exception
+. and .url defined. As you might suspect, .exception is the
+. exception that was raised.
+] 1
+{ 1
+. CallbackObject instance. The attributes for failure_callback are:
+.
+. exception = the raised exception
+. url = the url we're trying to fetch
+. tries = the number of tries so far (including this one)
+. retry = the value of the retry option
+}
+.
+. The callback is present primarily to inform the calling program of
+. the failure, but if it raises an exception (including the one it's
+. passed) that exception will NOT be caught and will therefore cause
+. future retries to be aborted.
+.
+{ 1
+. The callback is called for EVERY failure, including the last one.
+. On the last try, the callback can raise an alternate exception,
+. but it cannot (without severe trickiness) prevent the exception
+. from being raised.
+.
+. interrupt_callback = None
+.
+. This callback is called if KeyboardInterrupt is received at any
+. point in the transfer. Basically, this callback can have three
+. impacts on the fetch process based on the way it exits:
+.
+. 1) raise no exception: the current fetch will be aborted, but
+. any further retries will still take place
+.
+. 2) raise a URLGrabError: if you're using a MirrorGroup, then
+. this will prompt a failover to the next mirror according to
+. the behavior of the MirrorGroup subclass. It is recommended
+. that you raise URLGrabError with code 15, 'user abort'. If
+. you are NOT using a MirrorGroup subclass, then this is the
+. same as (3).
+.
+. 3) raise some other exception (such as KeyboardInterrupt), which
+. will not be caught at either the grabber or mirror levels.
+. That is, it will be raised up all the way to the caller.
+.
+. This callback is very similar to failure_callback. They are
+. passed the same arguments, so you could use the same function for
+. both.
+.
+}
+. BANDWIDTH THROTTLING
+.
+. urlgrabber supports throttling via two values: throttle and
+. bandwidth Between the two, you can either specify and absolute
+. throttle threshold or specify a theshold as a fraction of maximum
+. available bandwidth.
+.
+. throttle is a number - if it's an int, it's the bytes/second
+. throttle limit. If it's a float, it is first multiplied by
+. bandwidth. If throttle == 0, throttling is disabled. If None, the
+. module-level default (which can be set with set_throttle) is used.
+.
+. bandwidth is the nominal max bandwidth in bytes/second. If throttle
+. is a float and bandwidth == 0, throttling is disabled. If None, the
+. module-level default (which can be set with set_bandwidth) is used.
+.
+. THROTTLING EXAMPLES:
+.
+. Lets say you have a 100 Mbps connection. This is (about) 10^8 bits
+. per second, or 12,500,000 Bytes per second. You have a number of
+. throttling options:
+.
+. *) set_bandwidth(12500000); set_throttle(0.5) # throttle is a float
+.
+. This will limit urlgrab to use half of your available bandwidth.
+.
+. *) set_throttle(6250000) # throttle is an int
+.
+. This will also limit urlgrab to use half of your available
+. bandwidth, regardless of what bandwidth is set to.
+.
+. *) set_throttle(6250000); set_throttle(1.0) # float
+.
+. Use half your bandwidth
+.
+. *) set_throttle(6250000); set_throttle(2.0) # float
+.
+. Use up to 12,500,000 Bytes per second (your nominal max bandwidth)
+.
+. *) set_throttle(6250000); set_throttle(0) # throttle = 0
+.
+. Disable throttling - this is more efficient than a very large
+. throttle setting.
+.
+. *) set_throttle(0); set_throttle(1.0) # throttle is float, bandwidth = 0
+.
+. Disable throttling - this is the default when the module is loaded.
+.
+. SUGGESTED AUTHOR IMPLEMENTATION (THROTTLING)
+.
+. While this is flexible, it's not extremely obvious to the user. I
+. suggest you implement a float throttle as a percent to make the
+. distinction between absolute and relative throttling very explicit.
+.
+. Also, you may want to convert the units to something more convenient
+. than bytes/second, such as kbps or kB/s, etc.
+.
+. """
+.
+[ 1
+. # $Id: grabber.py,v 1.39 2005/03/03 00:54:23 mstenner Exp $
+] 1
+{ 1
+. # $Id: grabber.py,v 1.43 2005/10/22 21:57:28 mstenner Exp $
+}
+.
+. import os
+. import os.path
+{ 1
+. import sys
+}
+. import urlparse
+. import rfc822
+. import time
+. import string
+. import urllib
+. import urllib2
+. from stat import * # S_* and ST_*
+.
+{ 1
+. ########################################################################
+. # MODULE INITIALIZATION
+. ########################################################################
+}
+. try:
+. exec('from ' + (__name__.split('.'))[0] + ' import __version__')
+. except:
+. __version__ = '???'
+.
+. auth_handler = urllib2.HTTPBasicAuthHandler( \
+. urllib2.HTTPPasswordMgrWithDefaultRealm())
+.
+[ 1
+. DEBUG=0
+.
+] 1
+. try:
+. from i18n import _
+. except ImportError, msg:
+. def _(st): return st
+.
+. try:
+. from httplib import HTTPException
+. except ImportError, msg:
+. HTTPException = None
+.
+. try:
+. # This is a convenient way to make keepalive optional.
+. # Just rename the module so it can't be imported.
+{ 1
+. import keepalive
+}
+. from keepalive import HTTPHandler
+. except ImportError, msg:
+. keepalive_handler = None
+. else:
+. keepalive_handler = HTTPHandler()
+.
+. try:
+. # add in range support conditionally too
+[ 1
+. from urlgrabber.byterange import HTTPRangeHandler, FileRangeHandler, \
+] 1
+{ 1
+. import byterange
+. from byterange import HTTPRangeHandler, FileRangeHandler, \
+}
+. FTPRangeHandler, range_tuple_normalize, range_tuple_to_header, \
+. RangeError
+. except ImportError, msg:
+. range_handlers = ()
+. RangeError = None
+. have_range = 0
+. else:
+. range_handlers = (HTTPRangeHandler(), FileRangeHandler(), FTPRangeHandler())
+. have_range = 1
+.
+.
+. # check whether socket timeout support is available (Python >= 2.3)
+. import socket
+. try:
+. TimeoutError = socket.timeout
+. have_socket_timeout = True
+. except AttributeError:
+. TimeoutError = None
+. have_socket_timeout = False
+{ 1
+.
+. ########################################################################
+. # functions for debugging output. These functions are here because they
+. # are also part of the module initialization.
+. DEBUG = None
+. def set_logger(DBOBJ):
+. """Set the DEBUG object. This is called by _init_default_logger when
+. the environment variable URLGRABBER_DEBUG is set, but can also be
+. called by a calling program. Basically, if the calling program uses
+. the logging module and would like to incorporate urlgrabber logging,
+. then it can do so this way. It's probably not necessary as most
+. internal logging is only for debugging purposes.
+.
+. The passed-in object should be a logging.Logger instance. It will
+. be pushed into the keepalive and byterange modules if they're
+. being used. The mirror module pulls this object in on import, so
+. you will need to manually push into it. In fact, you may find it
+. tidier to simply push your logging object (or objects) into each
+. of these modules independently.
+. """
+.
+. global DEBUG
+. DEBUG = DBOBJ
+. if keepalive_handler and keepalive.DEBUG is None:
+. keepalive.DEBUG = DBOBJ
+. if have_range and byterange.DEBUG is None:
+. byterange.DEBUG = DBOBJ
+.
+. def _init_default_logger():
+. '''Examines the environment variable URLGRABBER_DEBUG and creates
+. a logging object (logging.logger) based on the contents. It takes
+. the form
+.
+. URLGRABBER_DEBUG=level,filename
+.
+. where "level" can be either an integer or a log level from the
+. logging module (DEBUG, INFO, etc). If the integer is zero or
+. less, logging will be disabled. Filename is the filename where
+. logs will be sent. If it is "-", then stdout will be used. If
+. the filename is empty or missing, stderr will be used. If the
+. variable cannot be processed or the logging module cannot be
+. imported (python < 2.3) then logging will be disabled. Here are
+. some examples:
+.
+. URLGRABBER_DEBUG=1,debug.txt # log everything to debug.txt
+. URLGRABBER_DEBUG=WARNING,- # log warning and higher to stdout
+. URLGRABBER_DEBUG=INFO # log info and higher to stderr
+.
+. This funtion is called during module initialization. It is not
+. intended to be called from outside. The only reason it is a
+. function at all is to keep the module-level namespace tidy and to
+. collect the code into a nice block.'''
+.
+. try:
+. dbinfo = os.environ['URLGRABBER_DEBUG'].split(',')
+. import logging
+. level = logging._levelNames.get(dbinfo[0], int(dbinfo[0]))
+. if level < 1: raise ValueError()
+.
+. formatter = logging.Formatter('%(asctime)s %(message)s')
+. if len(dbinfo) > 1: filename = dbinfo[1]
+. else: filename = ''
+. if filename == '': handler = logging.StreamHandler(sys.stderr)
+. elif filename == '-': handler = logging.StreamHandler(sys.stdout)
+. else: handler = logging.FileHandler(filename)
+. handler.setFormatter(formatter)
+. DBOBJ = logging.getLogger('urlgrabber')
+. DBOBJ.addHandler(handler)
+. DBOBJ.setLevel(level)
+. except (KeyError, ImportError, ValueError):
+. DBOBJ = None
+. set_logger(DBOBJ)
+.
+. _init_default_logger()
+. ########################################################################
+. # END MODULE INITIALIZATION
+. ########################################################################
+.
+.
+}
+.
+. class URLGrabError(IOError):
+. """
+. URLGrabError error codes:
+.
+. URLGrabber error codes (0 -- 255)
+. 0 - everything looks good (you should never see this)
+. 1 - malformed url
+. 2 - local file doesn't exist
+. 3 - request for non-file local file (dir, etc)
+. 4 - IOError on fetch
+. 5 - OSError on fetch
+. 6 - no content length header when we expected one
+. 7 - HTTPException
+. 8 - Exceeded read limit (for urlread)
+. 9 - Requested byte range not satisfiable.
+. 10 - Byte range requested, but range support unavailable
+. 11 - Illegal reget mode
+[ 1
+. 12 - Socket timeout.
+.
+] 1
+{ 1
+. 12 - Socket timeout
+. 13 - malformed proxy url
+. 14 - HTTPError (includes .code and .exception attributes)
+. 15 - user abort
+.
+}
+. MirrorGroup error codes (256 -- 511)
+. 256 - No more mirrors left to try
+.
+. Custom (non-builtin) classes derived from MirrorGroup (512 -- 767)
+. [ this range reserved for application-specific error codes ]
+.
+. Retry codes (< 0)
+. -1 - retry the download, unknown reason
+.
+. Note: to test which group a code is in, you can simply do integer
+. division by 256: e.errno / 256
+.
+. Negative codes are reserved for use by functions passed in to
+. retrygrab with checkfunc. The value -1 is built in as a generic
+. retry code and is already included in the retrycodes list.
+. Therefore, you can create a custom check function that simply
+. returns -1 and the fetch will be re-tried. For more customized
+. retries, you can use other negative number and include them in
+. retry-codes. This is nice for outputting useful messages about
+. what failed.
+.
+. You can use these error codes like so:
+. try: urlgrab(url)
+. except URLGrabError, e:
+. if e.errno == 3: ...
+. # or
+. print e.strerror
+. # or simply
+. print e #### print '[Errno %i] %s' % (e.errno, e.strerror)
+. """
+. pass
+.
+. class CallbackObject:
+. """Container for returned callback data.
+.
+. This is currently a dummy class into which urlgrabber can stuff
+. information for passing to callbacks. This way, the prototype for
+. all callbacks is the same, regardless of the data that will be
+. passed back. Any function that accepts a callback function as an
+. argument SHOULD document what it will define in this object.
+.
+. It is possible that this class will have some greater
+. functionality in the future.
+. """
+[ 1
+. pass
+] 1
+{ 1
+. def __init__(self, **kwargs):
+. self.__dict__.update(kwargs)
+}
+.
+. def close_all():
+. """close any open keepalive connections"""
+. if keepalive_handler: keepalive_handler.close_all()
+.
+. def urlgrab(url, filename=None, **kwargs):
+. """grab the file at and make a local copy at
+. If filename is none, the basename of the url is used.
+. urlgrab returns the filename of the local file, which may be different
+. from the passed-in filename if the copy_local kwarg == 0.
+.
+. See module documentation for a description of possible kwargs.
+. """
+. return default_grabber.urlgrab(url, filename, **kwargs)
+.
+. def urlopen(url, **kwargs):
+. """open the url and return a file object
+. If a progress object or throttle specifications exist, then
+. a special file object will be returned that supports them.
+. The file object can be treated like any other file object.
+.
+. See module documentation for a description of possible kwargs.
+. """
+. return default_grabber.urlopen(url, **kwargs)
+.
+. def urlread(url, limit=None, **kwargs):
+. """read the url into a string, up to 'limit' bytes
+. If the limit is exceeded, an exception will be thrown. Note that urlread
+. is NOT intended to be used as a way of saying "I want the first N bytes"
+. but rather 'read the whole file into memory, but don't use too much'
+.
+. See module documentation for a description of possible kwargs.
+. """
+. return default_grabber.urlread(url, limit, **kwargs)
+.
+.
+. class URLGrabberOptions:
+. """Class to ease kwargs handling."""
+.
+. def __init__(self, delegate=None, **kwargs):
+. """Initialize URLGrabberOptions object.
+. Set default values for all options and then update options specified
+. in kwargs.
+. """
+. self.delegate = delegate
+. if delegate is None:
+. self._set_defaults()
+. self._set_attributes(**kwargs)
+.
+. def __getattr__(self, name):
+. if self.delegate and hasattr(self.delegate, name):
+. return getattr(self.delegate, name)
+. raise AttributeError, name
+.
+. def raw_throttle(self):
+. """Calculate raw throttle value from throttle and bandwidth
+. values.
+. """
+. if self.throttle <= 0:
+. return 0
+. elif type(self.throttle) == type(0):
+. return float(self.throttle)
+. else: # throttle is a float
+. return self.bandwidth * self.throttle
+.
+. def derive(self, **kwargs):
+. """Create a derived URLGrabberOptions instance.
+. This method creates a new instance and overrides the
+. options specified in kwargs.
+. """
+. return URLGrabberOptions(delegate=self, **kwargs)
+.
+. def _set_attributes(self, **kwargs):
+. """Update object attributes with those provided in kwargs."""
+. self.__dict__.update(kwargs)
+. if have_range and kwargs.has_key('range'):
+. # normalize the supplied range value
+. self.range = range_tuple_normalize(self.range)
+. if not self.reget in [None, 'simple', 'check_timestamp']:
+. raise URLGrabError(11, _('Illegal reget mode: %s') \
+. % (self.reget, ))
+.
+. def _set_defaults(self):
+. """Set all options to their default values.
+. When adding new options, make sure a default is
+. provided here.
+. """
+. self.progress_obj = None
+. self.throttle = 1.0
+. self.bandwidth = 0
+. self.retry = None
+. self.retrycodes = [-1,2,4,5,6,7]
+. self.checkfunc = None
+. self.copy_local = 0
+. self.close_connection = 0
+. self.range = None
+. self.user_agent = 'urlgrabber/%s' % __version__
+. self.keepalive = 1
+. self.proxies = None
+. self.reget = None
+. self.failure_callback = None
+{ 1
+. self.interrupt_callback = None
+}
+. self.prefix = None
+. self.opener = None
+. self.cache_openers = True
+. self.timeout = None
+. self.text = None
+. self.http_headers = None
+. self.ftp_headers = None
+{ 1
+. self.data = None
+}
+.
+. class URLGrabber:
+. """Provides easy opening of URLs with a variety of options.
+.
+. All options are specified as kwargs. Options may be specified when
+. the class is created and may be overridden on a per request basis.
+.
+. New objects inherit default values from default_grabber.
+. """
+.
+. def __init__(self, **kwargs):
+. self.opts = URLGrabberOptions(**kwargs)
+.
+. def _retry(self, opts, func, *args):
+. tries = 0
+. while 1:
+{ 1
+. # there are only two ways out of this loop. The second has
+. # several "sub-ways"
+. # 1) via the return in the "try" block
+. # 2) by some exception being raised
+. # a) an excepton is raised that we don't "except"
+. # b) a callback raises ANY exception
+. # c) we're not retry-ing or have run out of retries
+. # d) the URLGrabError code is not in retrycodes
+. # beware of infinite loops :)
+}
+. tries = tries + 1
+{ 1
+. exception = None
+. retrycode = None
+. callback = None
+. if DEBUG: DEBUG.info('attempt %i/%s: %s',
+. tries, opts.retry, args[0])
+}
+. try:
+[ 1
+. return apply(func, (opts,) + args, {})
+] 1
+{ 1
+. r = apply(func, (opts,) + args, {})
+. if DEBUG: DEBUG.info('success')
+. return r
+}
+. except URLGrabError, e:
+[ 1
+. if DEBUG: print 'EXCEPTION: %s' % e
+. if (opts.retry is None) \
+. or (tries == opts.retry) \
+. or (e.errno not in opts.retrycodes): raise
+. if opts.failure_callback:
+. cb_func, cb_args, cb_kwargs = \
+. self._make_callback(opts.failure_callback)
+. # this is a little icky - for now, the first element
+. # of args is the url. we might consider a way to tidy
+. # that up, though
+. obj = CallbackObject()
+. obj.exception = e
+. obj.url = args[0]
+. cb_func(obj, *cb_args, **cb_kwargs)
+] 1
+{ 1
+. exception = e
+. callback = opts.failure_callback
+. retrycode = e.errno
+. except KeyboardInterrupt, e:
+. exception = e
+. callback = opts.interrupt_callback
+.
+. if DEBUG: DEBUG.info('exception: %s', exception)
+. if callback:
+. if DEBUG: DEBUG.info('calling callback: %s', callback)
+. cb_func, cb_args, cb_kwargs = self._make_callback(callback)
+. obj = CallbackObject(exception=exception, url=args[0],
+. tries=tries, retry=opts.retry)
+. cb_func(obj, *cb_args, **cb_kwargs)
+.
+. if (opts.retry is None) or (tries == opts.retry):
+. if DEBUG: DEBUG.info('retries exceeded, re-raising')
+. raise
+.
+. if (retrycode is not None) and (retrycode not in opts.retrycodes):
+. if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising',
+. retrycode, opts.retrycodes)
+. raise
+}
+.
+. def urlopen(self, url, **kwargs):
+. """open the url and return a file object
+. If a progress object or throttle value specified when this
+. object was created, then a special file object will be
+. returned that supports them. The file object can be treated
+. like any other file object.
+. """
+. opts = self.opts.derive(**kwargs)
+. (url,parts) = self._parse_url(url)
+. def retryfunc(opts, url):
+. return URLGrabberFileObject(url, filename=None, opts=opts)
+. return self._retry(opts, retryfunc, url)
+.
+. def urlgrab(self, url, filename=None, **kwargs):
+. """grab the file at and make a local copy at
+. If filename is none, the basename of the url is used.
+. urlgrab returns the filename of the local file, which may be
+. different from the passed-in filename if copy_local == 0.
+. """
+. opts = self.opts.derive(**kwargs)
+. (url, parts) = self._parse_url(url)
+. (scheme, host, path, parm, query, frag) = parts
+. if filename is None:
+. if scheme in [ 'http', 'https' ]:
+. filename = os.path.basename( urllib.unquote(path) )
+. else:
+. filename = os.path.basename( path )
+. if scheme == 'file' and not opts.copy_local:
+. # just return the name of the local file - don't make a
+. # copy currently
+. if not os.path.exists(path):
+. raise URLGrabError(2,
+. _('Local file does not exist: %s') % (path, ))
+. elif not os.path.isfile(path):
+. raise URLGrabError(3,
+. _('Not a normal file: %s') % (path, ))
+. elif not opts.range:
+. return path
+.
+. def retryfunc(opts, url, filename):
+. fo = URLGrabberFileObject(url, filename, opts)
+. try:
+. fo._do_grab()
+. if not opts.checkfunc is None:
+. cb_func, cb_args, cb_kwargs = \
+. self._make_callback(opts.checkfunc)
+. obj = CallbackObject()
+. obj.filename = filename
+. obj.url = url
+. apply(cb_func, (obj, )+cb_args, cb_kwargs)
+. finally:
+. fo.close()
+. return filename
+.
+. return self._retry(opts, retryfunc, url, filename)
+.
+. def urlread(self, url, limit=None, **kwargs):
+. """read the url into a string, up to 'limit' bytes
+. If the limit is exceeded, an exception will be thrown. Note
+. that urlread is NOT intended to be used as a way of saying
+. "I want the first N bytes" but rather 'read the whole file
+. into memory, but don't use too much'
+. """
+. opts = self.opts.derive(**kwargs)
+. (url, parts) = self._parse_url(url)
+. if limit is not None:
+. limit = limit + 1
+.
+. def retryfunc(opts, url, limit):
+. fo = URLGrabberFileObject(url, filename=None, opts=opts)
+. s = ''
+. try:
+. # this is an unfortunate thing. Some file-like objects
+. # have a default "limit" of None, while the built-in (real)
+. # file objects have -1. They each break the other, so for
+. # now, we just force the default if necessary.
+. if limit is None: s = fo.read()
+. else: s = fo.read(limit)
+.
+. if not opts.checkfunc is None:
+. cb_func, cb_args, cb_kwargs = \
+. self._make_callback(opts.checkfunc)
+. obj = CallbackObject()
+. obj.data = s
+. obj.url = url
+. apply(cb_func, (obj, )+cb_args, cb_kwargs)
+. finally:
+. fo.close()
+. return s
+.
+. s = self._retry(opts, retryfunc, url, limit)
+. if limit and len(s) > limit:
+. raise URLGrabError(8,
+. _('Exceeded limit (%i): %s') % (limit, url))
+. return s
+.
+. def _parse_url(self,url):
+. """break up the url into its component parts
+.
+. This function disassembles a url and
+. 1) "normalizes" it, tidying it up a bit
+. 2) does any authentication stuff it needs to do
+.
+. it returns the (cleaned) url and a tuple of component parts
+. """
+. if self.opts.prefix:
+. p = self.opts.prefix
+. if p[-1] == '/' or url[0] == '/': url = p + url
+. else: url = p + '/' + url
+.
+. (scheme, host, path, parm, query, frag) = \
+. urlparse.urlparse(url)
+. if not scheme:
+. if not url[0] == '/': url = os.path.abspath(url)
+. url = 'file:' + url
+. (scheme, host, path, parm, query, frag) = \
+. urlparse.urlparse(url)
+. path = os.path.normpath(path)
+. if scheme in ['http', 'https']: path = urllib.quote(path)
+. if '@' in host and auth_handler and scheme in ['http', 'https']:
+. try:
+. user_pass, host = host.split('@', 1)
+. if ':' in user_pass: user, password = user_pass.split(':', 1)
+. except ValueError, e:
+. raise URLGrabError(1, _('Bad URL: %s') % url)
+[ 1
+. if DEBUG: print 'adding HTTP auth: %s, %s' % (user, password)
+] 1
+{ 1
+. if DEBUG: DEBUG.info('adding HTTP auth: %s, %s', user, password)
+}
+. auth_handler.add_password(None, host, user, password)
+. parts = (scheme, host, path, parm, query, frag)
+. url = urlparse.urlunparse(parts)
+. return url, parts
+.
+. def _make_callback(self, callback_obj):
+. if callable(callback_obj):
+. return callback_obj, (), {}
+. else:
+. return callback_obj
+.
+. # create the default URLGrabber used by urlXXX functions.
+. # NOTE: actual defaults are set in URLGrabberOptions
+. default_grabber = URLGrabber()
+.
+. class URLGrabberFileObject:
+. """This is a file-object wrapper that supports progress objects
+. and throttling.
+.
+. This exists to solve the following problem: lets say you want to
+. drop-in replace a normal open with urlopen. You want to use a
+. progress meter and/or throttling, but how do you do that without
+. rewriting your code? Answer: urlopen will return a wrapped file
+. object that does the progress meter and-or throttling internally.
+. """
+.
+. def __init__(self, url, filename, opts):
+. self.url = url
+. self.filename = filename
+. self.opts = opts
+. self.fo = None
+. self._rbuf = ''
+. self._rbufsize = 1024*8
+. self._ttime = time.time()
+. self._tsize = 0
+. self._amount_read = 0
+. self._opener = None
+. self._do_open()
+.
+. def __getattr__(self, name):
+. """This effectively allows us to wrap at the instance level.
+. Any attribute not found in _this_ object will be searched for
+. in self.fo. This includes methods."""
+. if hasattr(self.fo, name):
+. return getattr(self.fo, name)
+. raise AttributeError, name
+.
+. def _get_opener(self):
+. """Build a urllib2 OpenerDirector based on request options."""
+. if self.opts.opener:
+. return self.opts.opener
+. elif self._opener is None:
+. handlers = []
+. need_keepalive_handler = (keepalive_handler and self.opts.keepalive)
+. need_range_handler = (range_handlers and \
+. (self.opts.range or self.opts.reget))
+. # if you specify a ProxyHandler when creating the opener
+. # it _must_ come before all other handlers in the list or urllib2
+. # chokes.
+. if self.opts.proxies:
+. handlers.append( CachedProxyHandler(self.opts.proxies) )
+.
+. # -------------------------------------------------------
+. # OK, these next few lines are a serious kludge to get
+. # around what I think is a bug in python 2.2's
+. # urllib2. The basic idea is that default handlers
+. # get applied first. If you override one (like a
+. # proxy handler), then the default gets pulled, but
+. # the replacement goes on the end. In the case of
+. # proxies, this means the normal handler picks it up
+. # first and the proxy isn't used. Now, this probably
+. # only happened with ftp or non-keepalive http, so not
+. # many folks saw it. The simple approach to fixing it
+. # is just to make sure you override the other
+. # conflicting defaults as well. I would LOVE to see
+. # these go way or be dealt with more elegantly. The
+. # problem isn't there after 2.2. -MDS 2005/02/24
+. if not need_keepalive_handler:
+. handlers.append( urllib2.HTTPHandler() )
+. if not need_range_handler:
+. handlers.append( urllib2.FTPHandler() )
+. # -------------------------------------------------------
+.
+. if need_keepalive_handler:
+. handlers.append( keepalive_handler )
+. if need_range_handler:
+. handlers.extend( range_handlers )
+. handlers.append( auth_handler )
+. if self.opts.cache_openers:
+. self._opener = CachedOpenerDirector(*handlers)
+. else:
+. self._opener = urllib2.build_opener(*handlers)
+. # OK, I don't like to do this, but otherwise, we end up with
+. # TWO user-agent headers.
+. self._opener.addheaders = []
+. return self._opener
+.
+. def _do_open(self):
+. opener = self._get_opener()
+.
+[ 1
+. req = urllib2.Request(self.url) # build request object
+] 1
+{ 1
+. req = urllib2.Request(self.url, self.opts.data) # build request object
+}
+. self._add_headers(req) # add misc headers that we need
+. self._build_range(req) # take care of reget and byterange stuff
+.
+. fo, hdr = self._make_request(req, opener)
+. if self.reget_time and self.opts.reget == 'check_timestamp':
+. # do this if we have a local file with known timestamp AND
+. # we're in check_timestamp reget mode.
+. fetch_again = 0
+. try:
+. modified_tuple = hdr.getdate_tz('last-modified')
+. modified_stamp = rfc822.mktime_tz(modified_tuple)
+. if modified_stamp > self.reget_time: fetch_again = 1
+. except (TypeError,):
+. fetch_again = 1
+.
+. if fetch_again:
+. # the server version is newer than the (incomplete) local
+. # version, so we should abandon the version we're getting
+. # and fetch the whole thing again.
+. fo.close()
+. self.opts.reget = None
+. del req.headers['Range']
+. self._build_range(req)
+. fo, hdr = self._make_request(req, opener)
+.
+. (scheme, host, path, parm, query, frag) = urlparse.urlparse(self.url)
+. if not (self.opts.progress_obj or self.opts.raw_throttle() \
+. or self.opts.timeout):
+. # if we're not using the progress_obj, throttling, or timeout
+. # we can get a performance boost by going directly to
+. # the underlying fileobject for reads.
+. self.read = fo.read
+. if hasattr(fo, 'readline'):
+. self.readline = fo.readline
+. elif self.opts.progress_obj:
+. try: length = int(hdr['Content-Length'])
+. except: length = None
+. self.opts.progress_obj.start(str(self.filename), self.url,
+. os.path.basename(path),
+. length,
+. text=self.opts.text)
+. self.opts.progress_obj.update(0)
+. (self.fo, self.hdr) = (fo, hdr)
+.
+. def _add_headers(self, req):
+. if self.opts.user_agent:
+. req.add_header('User-agent', self.opts.user_agent)
+. try: req_type = req.get_type()
+. except ValueError: req_type = None
+. if self.opts.http_headers and req_type in ('http', 'https'):
+. for h, v in self.opts.http_headers:
+. req.add_header(h, v)
+. if self.opts.ftp_headers and req_type == 'ftp':
+. for h, v in self.opts.ftp_headers:
+. req.add_header(h, v)
+.
+. def _build_range(self, req):
+. self.reget_time = None
+. self.append = 0
+. reget_length = 0
+. rt = None
+. if have_range and self.opts.reget and type(self.filename) == type(''):
+. # we have reget turned on and we're dumping to a file
+. try:
+. s = os.stat(self.filename)
+. except OSError:
+. pass
+. else:
+. self.reget_time = s[ST_MTIME]
+. reget_length = s[ST_SIZE]
+. rt = reget_length, ''
+. self.append = 1
+.
+. if self.opts.range:
+. if not have_range:
+. raise URLGrabError(10, _('Byte range requested but range '\
+. 'support unavailable'))
+. rt = self.opts.range
+. if rt[0]: rt = (rt[0] + reget_length, rt[1])
+.
+. if rt:
+. header = range_tuple_to_header(rt)
+. if header: req.add_header('Range', header)
+.
+. def _make_request(self, req, opener):
+. try:
+. if have_socket_timeout and self.opts.timeout:
+. old_to = socket.getdefaulttimeout()
+. socket.setdefaulttimeout(self.opts.timeout)
+. try:
+. fo = opener.open(req)
+. finally:
+. socket.setdefaulttimeout(old_to)
+. else:
+. fo = opener.open(req)
+. hdr = fo.info()
+. except ValueError, e:
+. raise URLGrabError(1, _('Bad URL: %s') % (e, ))
+. except RangeError, e:
+[ 1
+. raise URLGrabError(9, _('%s') % (e, ))
+] 1
+{ 1
+. raise URLGrabError(9, str(e))
+. except urllib2.HTTPError, e:
+. new_e = URLGrabError(14, str(e))
+. new_e.code = e.code
+. new_e.exception = e
+. raise new_e
+}
+. except IOError, e:
+. if hasattr(e, 'reason') and have_socket_timeout and \
+. isinstance(e.reason, TimeoutError):
+. raise URLGrabError(12, _('Timeout: %s') % (e, ))
+. else:
+. raise URLGrabError(4, _('IOError: %s') % (e, ))
+. except OSError, e:
+. raise URLGrabError(5, _('OSError: %s') % (e, ))
+. except HTTPException, e:
+[ 1
+. raise URLGrabError(7, _('HTTP Error (%s): %s') % \
+] 1
+{ 1
+. raise URLGrabError(7, _('HTTP Exception (%s): %s') % \
+}
+. (e.__class__.__name__, e))
+. else:
+. return (fo, hdr)
+.
+. def _do_grab(self):
+. """dump the file to self.filename."""
+. if self.append: new_fo = open(self.filename, 'ab')
+. else: new_fo = open(self.filename, 'wb')
+. bs = 1024*8
+. size = 0
+.
+. block = self.read(bs)
+. size = size + len(block)
+. while block:
+. new_fo.write(block)
+. block = self.read(bs)
+. size = size + len(block)
+.
+. new_fo.close()
+. try:
+. modified_tuple = self.hdr.getdate_tz('last-modified')
+. modified_stamp = rfc822.mktime_tz(modified_tuple)
+. os.utime(self.filename, (modified_stamp, modified_stamp))
+. except (TypeError,), e: pass
+.
+. return size
+.
+. def _fill_buffer(self, amt=None):
+. """fill the buffer to contain at least 'amt' bytes by reading
+. from the underlying file object. If amt is None, then it will
+. read until it gets nothing more. It updates the progress meter
+. and throttles after every self._rbufsize bytes."""
+. # the _rbuf test is only in this first 'if' for speed. It's not
+. # logically necessary
+. if self._rbuf and not amt is None:
+. L = len(self._rbuf)
+. if amt > L:
+. amt = amt - L
+. else:
+. return
+.
+. # if we've made it here, then we don't have enough in the buffer
+. # and we need to read more.
+.
+. buf = [self._rbuf]
+. bufsize = len(self._rbuf)
+. while amt is None or amt:
+. # first, delay if necessary for throttling reasons
+. if self.opts.raw_throttle():
+. diff = self._tsize/self.opts.raw_throttle() - \
+. (time.time() - self._ttime)
+. if diff > 0: time.sleep(diff)
+. self._ttime = time.time()
+.
+. # now read some data, up to self._rbufsize
+. if amt is None: readamount = self._rbufsize
+. else: readamount = min(amt, self._rbufsize)
+. try:
+. new = self.fo.read(readamount)
+. except socket.error, e:
+. raise URLGrabError(4, _('Socket Error: %s') % (e, ))
+. except TimeoutError, e:
+. raise URLGrabError(12, _('Timeout: %s') % (e, ))
+. newsize = len(new)
+. if not newsize: break # no more to read
+.
+. if amt: amt = amt - newsize
+. buf.append(new)
+. bufsize = bufsize + newsize
+. self._tsize = newsize
+. self._amount_read = self._amount_read + newsize
+. if self.opts.progress_obj:
+. self.opts.progress_obj.update(self._amount_read)
+.
+. self._rbuf = string.join(buf, '')
+. return
+.
+. def read(self, amt=None):
+. self._fill_buffer(amt)
+. if amt is None:
+. s, self._rbuf = self._rbuf, ''
+. else:
+. s, self._rbuf = self._rbuf[:amt], self._rbuf[amt:]
+. return s
+.
+. def readline(self, limit=-1):
+. i = string.find(self._rbuf, '\n')
+. while i < 0 and not (0 < limit <= len(self._rbuf)):
+. L = len(self._rbuf)
+. self._fill_buffer(L + self._rbufsize)
+. if not len(self._rbuf) > L: break
+. i = string.find(self._rbuf, '\n', L)
+.
+. if i < 0: i = len(self._rbuf)
+. else: i = i+1
+. if 0 <= limit < len(self._rbuf): i = limit
+.
+. s, self._rbuf = self._rbuf[:i], self._rbuf[i:]
+. return s
+.
+. def close(self):
+. if self.opts.progress_obj:
+. self.opts.progress_obj.end(self._amount_read)
+. self.fo.close()
+. if self.opts.close_connection:
+. try: self.fo.close_connection()
+. except: pass
+.
+. _handler_cache = []
+. def CachedOpenerDirector(*handlers):
+. for (cached_handlers, opener) in _handler_cache:
+. if cached_handlers == handlers:
+. for handler in opener.handlers:
+. handler.add_parent(opener)
+. return opener
+. opener = urllib2.build_opener(*handlers)
+. _handler_cache.append( (handlers, opener) )
+. return opener
+.
+. _proxy_cache = []
+. def CachedProxyHandler(proxies):
+. for (pdict, handler) in _proxy_cache:
+. if pdict == proxies:
+{ 1
+. if DEBUG: DEBUG.debug('re-using proxy settings: %s', proxies)
+}
+. break
+. else:
+{ 1
+. for k, v in proxies.items():
+. utype, url = urllib.splittype(v)
+. host, other = urllib.splithost(url)
+. if (utype is None) or (host is None):
+. raise URLGrabError(13, _('Bad proxy URL: %s') % v)
+.
+. if DEBUG: DEBUG.info('creating new proxy handler: %s', proxies)
+}
+. handler = urllib2.ProxyHandler(proxies)
+. _proxy_cache.append( (proxies, handler) )
+. return handler
+.
+. #####################################################################
+. # DEPRECATED FUNCTIONS
+. def set_throttle(new_throttle):
+. """Deprecated. Use: default_grabber.throttle = new_throttle"""
+. default_grabber.throttle = new_throttle
+.
+. def set_bandwidth(new_bandwidth):
+. """Deprecated. Use: default_grabber.bandwidth = new_bandwidth"""
+. default_grabber.bandwidth = new_bandwidth
+.
+. def set_progress_obj(new_progress_obj):
+. """Deprecated. Use: default_grabber.progress_obj = new_progress_obj"""
+. default_grabber.progress_obj = new_progress_obj
+.
+. def set_user_agent(new_user_agent):
+. """Deprecated. Use: default_grabber.user_agent = new_user_agent"""
+. default_grabber.user_agent = new_user_agent
+.
+. def retrygrab(url, filename=None, copy_local=0, close_connection=0,
+. progress_obj=None, throttle=None, bandwidth=None,
+. numtries=3, retrycodes=[-1,2,4,5,6,7], checkfunc=None):
+. """Deprecated. Use: urlgrab() with the retry arg instead"""
+. kwargs = {'copy_local' : copy_local,
+. 'close_connection' : close_connection,
+. 'progress_obj' : progress_obj,
+. 'throttle' : throttle,
+. 'bandwidth' : bandwidth,
+. 'retry' : numtries,
+. 'retrycodes' : retrycodes,
+. 'checkfunc' : checkfunc
+. }
+. return urlgrab(url, filename, **kwargs)
+.
+.
+. #####################################################################
+. # TESTING
+. def _main_test():
+. import sys
+. try: url, filename = sys.argv[1:3]
+. except ValueError:
+. print 'usage:', sys.argv[0], \
+. ' [copy_local=0|1] [close_connection=0|1]'
+. sys.exit()
+.
+. kwargs = {}
+. for a in sys.argv[3:]:
+. k, v = string.split(a, '=', 1)
+. kwargs[k] = int(v)
+.
+. set_throttle(1.0)
+. set_bandwidth(32 * 1024)
+. print "throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle,
+. default_grabber.bandwidth)
+.
+. try: from progress import text_progress_meter
+. except ImportError, e: pass
+. else: kwargs['progress_obj'] = text_progress_meter()
+.
+. try: name = apply(urlgrab, (url, filename), kwargs)
+. except URLGrabError, e: print e
+. else: print 'LOCAL FILE:', name
+.
+.
+. def _retry_test():
+. import sys
+. try: url, filename = sys.argv[1:3]
+. except ValueError:
+. print 'usage:', sys.argv[0], \
+. ' [copy_local=0|1] [close_connection=0|1]'
+. sys.exit()
+.
+. kwargs = {}
+. for a in sys.argv[3:]:
+. k, v = string.split(a, '=', 1)
+. kwargs[k] = int(v)
+.
+. try: from progress import text_progress_meter
+. except ImportError, e: pass
+. else: kwargs['progress_obj'] = text_progress_meter()
+.
+[ 1
+. global DEBUG
+. #DEBUG = 1
+] 1
+. def cfunc(filename, hello, there='foo'):
+. print hello, there
+. import random
+. rnum = random.random()
+. if rnum < .5:
+. print 'forcing retry'
+. raise URLGrabError(-1, 'forcing retry')
+. if rnum < .75:
+. print 'forcing failure'
+. raise URLGrabError(-2, 'forcing immediate failure')
+. print 'success'
+. return
+.
+. close_all()
+. kwargs['checkfunc'] = (cfunc, ('hello',), {'there':'there'})
+. try: name = apply(retrygrab, (url, filename), kwargs)
+. except URLGrabError, e: print e
+. else: print 'LOCAL FILE:', name
+.
+. def _file_object_test(filename=None):
+. import random, cStringIO, sys
+. if filename is None:
+. filename = __file__
+. print 'using file "%s" for comparisons' % filename
+. fo = open(filename)
+. s_input = fo.read()
+. fo.close()
+.
+. for testfunc in [_test_file_object_smallread,
+. _test_file_object_readall,
+. _test_file_object_readline,
+. _test_file_object_readlines]:
+. fo_input = cStringIO.StringIO(s_input)
+. fo_output = cStringIO.StringIO()
+. wrapper = URLGrabberFileObject(fo_input, None, 0)
+. print 'testing %-30s ' % testfunc.__name__,
+. testfunc(wrapper, fo_output)
+. s_output = fo_output.getvalue()
+. if s_output == s_input: print 'passed'
+. else: print 'FAILED'
+.
+. def _test_file_object_smallread(wrapper, fo_output):
+. while 1:
+. s = wrapper.read(23)
+. fo_output.write(s)
+. if not s: return
+.
+. def _test_file_object_readall(wrapper, fo_output):
+. s = wrapper.read()
+. fo_output.write(s)
+.
+. def _test_file_object_readline(wrapper, fo_output):
+. while 1:
+. s = wrapper.readline()
+. fo_output.write(s)
+. if not s: return
+.
+. def _test_file_object_readlines(wrapper, fo_output):
+. li = wrapper.readlines()
+. fo_output.write(string.join(li, ''))
+.
+. if __name__ == '__main__':
+. _main_test()
+. _retry_test()
+. _file_object_test('test')
+[ 1
+.
+}
+] 1
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/eb/keepalive.py-20051228065045-e976cfd1ff7ea4e0.weave
+++ urlgrabber-2.9.7/.bzr/weaves/eb/keepalive.py-20051228065045-e976cfd1ff7ea4e0.weave
@@ -0,0 +1,661 @@
+# bzr weave file v5
+i
+1 fa3a15cc80680a47a3620078cac38125942edfa7
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+i 0
+1 49cfa7ab1308d55d1b8bc36f96854b3109ab4e42
+n wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
+
+w
+{ 0
+. # This library is free software; you can redistribute it and/or
+. # modify it under the terms of the GNU Lesser General Public
+. # License as published by the Free Software Foundation; either
+. # version 2.1 of the License, or (at your option) any later version.
+. #
+. # This library is distributed in the hope that it will be useful,
+. # but WITHOUT ANY WARRANTY; without even the implied warranty of
+. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+. # Lesser General Public License for more details.
+. #
+. # You should have received a copy of the GNU Lesser General Public
+. # License along with this library; if not, write to the
+. # Free Software Foundation, Inc.,
+. # 59 Temple Place, Suite 330,
+. # Boston, MA 02111-1307 USA
+.
+. # This file is part of urlgrabber, a high-level cross-protocol url-grabber
+. # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+.
+. """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
+.
+. >>> import urllib2
+. >>> from keepalive import HTTPHandler
+. >>> keepalive_handler = HTTPHandler()
+. >>> opener = urllib2.build_opener(keepalive_handler)
+. >>> urllib2.install_opener(opener)
+. >>>
+. >>> fo = urllib2.urlopen('http://www.python.org')
+.
+. If a connection to a given host is requested, and all of the existing
+. connections are still in use, another connection will be opened. If
+. the handler tries to use an existing connection but it fails in some
+. way, it will be closed and removed from the pool.
+.
+. To remove the handler, simply re-run build_opener with no arguments, and
+. install that opener.
+.
+. You can explicitly close connections by using the close_connection()
+. method of the returned file-like object (described below) or you can
+. use the handler methods:
+.
+. close_connection(host)
+. close_all()
+. open_connections()
+.
+. NOTE: using the close_connection and close_all methods of the handler
+. should be done with care when using multiple threads.
+. * there is nothing that prevents another thread from creating new
+. connections immediately after connections are closed
+. * no checks are done to prevent in-use connections from being closed
+.
+. >>> keepalive_handler.close_all()
+.
+. EXTRA ATTRIBUTES AND METHODS
+.
+. Upon a status of 200, the object returned has a few additional
+. attributes and methods, which should not be used if you want to
+. remain consistent with the normal urllib2-returned objects:
+.
+. close_connection() - close the connection to the host
+. readlines() - you know, readlines()
+. status - the return status (ie 404)
+. reason - english translation of status (ie 'File not found')
+.
+. If you want the best of both worlds, use this inside an
+. AttributeError-catching try:
+.
+. >>> try: status = fo.status
+. >>> except AttributeError: status = None
+.
+. Unfortunately, these are ONLY there if status == 200, so it's not
+. easy to distinguish between non-200 responses. The reason is that
+. urllib2 tries to do clever things with error codes 301, 302, 401,
+. and 407, and it wraps the object upon return.
+.
+. For python versions earlier than 2.4, you can avoid this fancy error
+. handling by setting the module-level global HANDLE_ERRORS to zero.
+. You see, prior to 2.4, it's the HTTP Handler's job to determine what
+. to handle specially, and what to just pass up. HANDLE_ERRORS == 0
+. means "pass everything up". In python 2.4, however, this job no
+. longer belongs to the HTTP Handler and is now done by a NEW handler,
+. HTTPErrorProcessor. Here's the bottom line:
+.
+. python version < 2.4
+. HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
+. errors
+. HANDLE_ERRORS == 0 pass everything up, error processing is
+. left to the calling code
+. python version >= 2.4
+. HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
+. HANDLE_ERRORS == 0 (default) pass everything up, let the
+. other handlers (specifically,
+. HTTPErrorProcessor) decide what to do
+.
+. In practice, setting the variable either way makes little difference
+. in python 2.4, so for the most consistent behavior across versions,
+. you probably just want to use the defaults, which will give you
+. exceptions on errors.
+.
+. """
+.
+[ 1
+. # $Id: keepalive.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
+] 1
+{ 1
+. # $Id: keepalive.py,v 1.13 2005/10/22 21:57:28 mstenner Exp $
+}
+.
+. import urllib2
+. import httplib
+. import socket
+. import thread
+.
+[ 1
+. DEBUG = 0
+. def DBPRINT(*args): print ' '.join(args)
+] 1
+{ 1
+. DEBUG = None
+}
+.
+. import sys
+[ 1
+. _python_version = map(int, sys.version.split()[0].split('.'))
+. if _python_version < [2, 4]: HANDLE_ERRORS = 1
+] 1
+{ 1
+. if sys.version_info < (2, 4): HANDLE_ERRORS = 1
+}
+. else: HANDLE_ERRORS = 0
+.
+. class ConnectionManager:
+. """
+. The connection manager must be able to:
+. * keep track of all existing
+. """
+. def __init__(self):
+. self._lock = thread.allocate_lock()
+. self._hostmap = {} # map hosts to a list of connections
+. self._connmap = {} # map connections to host
+. self._readymap = {} # map connection to ready state
+.
+. def add(self, host, connection, ready):
+. self._lock.acquire()
+. try:
+. if not self._hostmap.has_key(host): self._hostmap[host] = []
+. self._hostmap[host].append(connection)
+. self._connmap[connection] = host
+. self._readymap[connection] = ready
+. finally:
+. self._lock.release()
+.
+. def remove(self, connection):
+. self._lock.acquire()
+. try:
+. try:
+. host = self._connmap[connection]
+. except KeyError:
+. pass
+. else:
+. del self._connmap[connection]
+. del self._readymap[connection]
+. self._hostmap[host].remove(connection)
+. if not self._hostmap[host]: del self._hostmap[host]
+. finally:
+. self._lock.release()
+.
+. def set_ready(self, connection, ready):
+. try: self._readymap[connection] = ready
+. except KeyError: pass
+.
+. def get_ready_conn(self, host):
+. conn = None
+. self._lock.acquire()
+. try:
+. if self._hostmap.has_key(host):
+. for c in self._hostmap[host]:
+. if self._readymap[c]:
+. self._readymap[c] = 0
+. conn = c
+. break
+. finally:
+. self._lock.release()
+. return conn
+.
+. def get_all(self, host=None):
+. if host:
+. return list(self._hostmap.get(host, []))
+. else:
+. return dict(self._hostmap)
+.
+. class HTTPHandler(urllib2.HTTPHandler):
+. def __init__(self):
+. self._cm = ConnectionManager()
+.
+. #### Connection Management
+. def open_connections(self):
+. """return a list of connected hosts and the number of connections
+. to each. [('foo.com:80', 2), ('bar.org', 1)]"""
+. return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
+.
+. def close_connection(self, host):
+. """close connection(s) to
+. host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
+. no error occurs if there is no connection to that host."""
+. for h in self._cm.get_all(host):
+. self._cm.remove(h)
+. h.close()
+.
+. def close_all(self):
+. """close all open connections"""
+. for host, conns in self._cm.get_all().items():
+. for h in conns:
+. self._cm.remove(h)
+. h.close()
+.
+. def _request_closed(self, request, host, connection):
+. """tells us that this request is now closed and the the
+. connection is ready for another request"""
+. self._cm.set_ready(connection, 1)
+.
+. def _remove_connection(self, host, connection, close=0):
+. if close: connection.close()
+. self._cm.remove(connection)
+.
+. #### Transaction Execution
+. def http_open(self, req):
+. return self.do_open(HTTPConnection, req)
+.
+. def do_open(self, http_class, req):
+. host = req.get_host()
+. if not host:
+. raise urllib2.URLError('no host given')
+.
+. try:
+. h = self._cm.get_ready_conn(host)
+. while h:
+. r = self._reuse_connection(h, req, host)
+.
+. # if this response is non-None, then it worked and we're
+. # done. Break out, skipping the else block.
+. if r: break
+.
+. # connection is bad - possibly closed by server
+. # discard it and ask for the next free connection
+. h.close()
+. self._cm.remove(h)
+. h = self._cm.get_ready_conn(host)
+. else:
+. # no (working) free connections were found. Create a new one.
+. h = http_class(host)
+[ 1
+. if DEBUG: DBPRINT("creating new connection to %s (%d)" % \
+. (host, id(h)))
+] 1
+{ 1
+. if DEBUG: DEBUG.info("creating new connection to %s (%d)",
+. host, id(h))
+}
+. self._cm.add(host, h, 0)
+. self._start_transaction(h, req)
+. r = h.getresponse()
+. except (socket.error, httplib.HTTPException), err:
+. raise urllib2.URLError(err)
+.
+. # if not a persistent connection, don't try to reuse it
+. if r.will_close: self._cm.remove(h)
+.
+[ 1
+. if DEBUG: DBPRINT("STATUS: %s, %s" % (r.status, r.reason))
+] 1
+{ 1
+. if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
+}
+. r._handler = self
+. r._host = host
+. r._url = req.get_full_url()
+. r._connection = h
+. r.code = r.status
+.
+. if r.status == 200 or not HANDLE_ERRORS:
+. return r
+. else:
+. return self.parent.error('http', req, r, r.status, r.reason, r.msg)
+.
+.
+. def _reuse_connection(self, h, req, host):
+. """start the transaction with a re-used connection
+. return a response object (r) upon success or None on failure.
+. This DOES not close or remove bad connections in cases where
+. it returns. However, if an unexpected exception occurs, it
+. will close and remove the connection before re-raising.
+. """
+. try:
+. self._start_transaction(h, req)
+. r = h.getresponse()
+. # note: just because we got something back doesn't mean it
+. # worked. We'll check the version below, too.
+. except (socket.error, httplib.HTTPException):
+. r = None
+. except:
+. # adding this block just in case we've missed
+. # something we will still raise the exception, but
+. # lets try and close the connection and remove it
+. # first. We previously got into a nasty loop
+. # where an exception was uncaught, and so the
+. # connection stayed open. On the next try, the
+. # same exception was raised, etc. The tradeoff is
+. # that it's now possible this call will raise
+. # a DIFFERENT exception
+[ 1
+. if DEBUG: DBPRINT("unexpected exception - " \
+. "closing connection to %s (%d)" % (host, id(h)))
+] 1
+{ 1
+. if DEBUG: DEBUG.error("unexpected exception - closing " + \
+. "connection to %s (%d)", host, id(h))
+}
+. self._cm.remove(h)
+. h.close()
+. raise
+.
+. if r is None or r.version == 9:
+. # httplib falls back to assuming HTTP 0.9 if it gets a
+. # bad header back. This is most likely to happen if
+. # the socket has been closed by the server since we
+. # last used the connection.
+[ 1
+. if DEBUG: DBPRINT("failed to re-use connection to %s (%d)" \
+. % (host, id(h)))
+] 1
+{ 1
+. if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
+. host, id(h))
+}
+. r = None
+. else:
+[ 1
+. if DEBUG: DBPRINT("re-using connection to %s (%d)" % (host, id(h)))
+] 1
+{ 1
+. if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
+}
+.
+. return r
+.
+. def _start_transaction(self, h, req):
+. try:
+. if req.has_data():
+. data = req.get_data()
+. h.putrequest('POST', req.get_selector())
+. if not req.headers.has_key('Content-type'):
+. h.putheader('Content-type',
+. 'application/x-www-form-urlencoded')
+. if not req.headers.has_key('Content-length'):
+. h.putheader('Content-length', '%d' % len(data))
+. else:
+. h.putrequest('GET', req.get_selector())
+. except (socket.error, httplib.HTTPException), err:
+. raise urllib2.URLError(err)
+.
+. for args in self.parent.addheaders:
+. h.putheader(*args)
+. for k, v in req.headers.items():
+. h.putheader(k, v)
+. h.endheaders()
+. if req.has_data():
+. h.send(data)
+.
+. class HTTPResponse(httplib.HTTPResponse):
+. # we need to subclass HTTPResponse in order to
+. # 1) add readline() and readlines() methods
+. # 2) add close_connection() methods
+. # 3) add info() and geturl() methods
+.
+. # in order to add readline(), read must be modified to deal with a
+. # buffer. example: readline must read a buffer and then spit back
+. # one line at a time. The only real alternative is to read one
+. # BYTE at a time (ick). Once something has been read, it can't be
+. # put back (ok, maybe it can, but that's even uglier than this),
+. # so if you THEN do a normal read, you must first take stuff from
+. # the buffer.
+.
+. # the read method wraps the original to accomodate buffering,
+. # although read() never adds to the buffer.
+. # Both readline and readlines have been stolen with almost no
+. # modification from socket.py
+.
+.
+. def __init__(self, sock, debuglevel=0, strict=0, method=None):
+. if method: # the httplib in python 2.3 uses the method arg
+. httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
+. else: # 2.2 doesn't
+. httplib.HTTPResponse.__init__(self, sock, debuglevel)
+. self.fileno = sock.fileno
+. self.code = None
+. self._rbuf = ''
+. self._rbufsize = 8096
+. self._handler = None # inserted by the handler later
+. self._host = None # (same)
+. self._url = None # (same)
+. self._connection = None # (same)
+.
+. _raw_read = httplib.HTTPResponse.read
+.
+. def close(self):
+. if self.fp:
+. self.fp.close()
+. self.fp = None
+. if self._handler:
+. self._handler._request_closed(self, self._host,
+. self._connection)
+.
+. def close_connection(self):
+. self._handler._remove_connection(self._host, self._connection, close=1)
+. self.close()
+.
+. def info(self):
+. return self.msg
+.
+. def geturl(self):
+. return self._url
+.
+. def read(self, amt=None):
+. # the _rbuf test is only in this first if for speed. It's not
+. # logically necessary
+. if self._rbuf and not amt is None:
+. L = len(self._rbuf)
+. if amt > L:
+. amt -= L
+. else:
+. s = self._rbuf[:amt]
+. self._rbuf = self._rbuf[amt:]
+. return s
+.
+. s = self._rbuf + self._raw_read(amt)
+. self._rbuf = ''
+. return s
+.
+. def readline(self, limit=-1):
+. data = ""
+. i = self._rbuf.find('\n')
+. while i < 0 and not (0 < limit <= len(self._rbuf)):
+. new = self._raw_read(self._rbufsize)
+. if not new: break
+. i = new.find('\n')
+. if i >= 0: i = i + len(self._rbuf)
+. self._rbuf = self._rbuf + new
+. if i < 0: i = len(self._rbuf)
+. else: i = i+1
+. if 0 <= limit < len(self._rbuf): i = limit
+. data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
+. return data
+.
+. def readlines(self, sizehint = 0):
+. total = 0
+. list = []
+. while 1:
+. line = self.readline()
+. if not line: break
+. list.append(line)
+. total += len(line)
+. if sizehint and total >= sizehint:
+. break
+. return list
+.
+.
+. class HTTPConnection(httplib.HTTPConnection):
+. # use the modified response class
+. response_class = HTTPResponse
+.
+. #########################################################################
+. ##### TEST FUNCTIONS
+. #########################################################################
+.
+. def error_handler(url):
+. global HANDLE_ERRORS
+. orig = HANDLE_ERRORS
+. keepalive_handler = HTTPHandler()
+. opener = urllib2.build_opener(keepalive_handler)
+. urllib2.install_opener(opener)
+. pos = {0: 'off', 1: 'on'}
+. for i in (0, 1):
+. print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
+. HANDLE_ERRORS = i
+. try:
+. fo = urllib2.urlopen(url)
+. foo = fo.read()
+. fo.close()
+. try: status, reason = fo.status, fo.reason
+. except AttributeError: status, reason = None, None
+. except IOError, e:
+. print " EXCEPTION: %s" % e
+. raise
+. else:
+. print " status = %s, reason = %s" % (status, reason)
+. HANDLE_ERRORS = orig
+. hosts = keepalive_handler.open_connections()
+. print "open connections:", hosts
+. keepalive_handler.close_all()
+.
+. def continuity(url):
+. import md5
+. format = '%25s: %s'
+.
+. # first fetch the file with the normal http handler
+. opener = urllib2.build_opener()
+. urllib2.install_opener(opener)
+. fo = urllib2.urlopen(url)
+. foo = fo.read()
+. fo.close()
+. m = md5.new(foo)
+. print format % ('normal urllib', m.hexdigest())
+.
+. # now install the keepalive handler and try again
+. opener = urllib2.build_opener(HTTPHandler())
+. urllib2.install_opener(opener)
+.
+. fo = urllib2.urlopen(url)
+. foo = fo.read()
+. fo.close()
+. m = md5.new(foo)
+. print format % ('keepalive read', m.hexdigest())
+.
+. fo = urllib2.urlopen(url)
+. foo = ''
+. while 1:
+. f = fo.readline()
+. if f: foo = foo + f
+. else: break
+. fo.close()
+. m = md5.new(foo)
+. print format % ('keepalive readline', m.hexdigest())
+.
+. def comp(N, url):
+. print ' making %i connections to:\n %s' % (N, url)
+.
+. sys.stdout.write(' first using the normal urllib handlers')
+. # first use normal opener
+. opener = urllib2.build_opener()
+. urllib2.install_opener(opener)
+. t1 = fetch(N, url)
+. print ' TIME: %.3f s' % t1
+.
+. sys.stdout.write(' now using the keepalive handler ')
+. # now install the keepalive handler and try again
+. opener = urllib2.build_opener(HTTPHandler())
+. urllib2.install_opener(opener)
+. t2 = fetch(N, url)
+. print ' TIME: %.3f s' % t2
+. print ' improvement factor: %.2f' % (t1/t2, )
+.
+. def fetch(N, url, delay=0):
+{ 1
+. import time
+}
+. lens = []
+. starttime = time.time()
+. for i in range(N):
+. if delay and i > 0: time.sleep(delay)
+. fo = urllib2.urlopen(url)
+. foo = fo.read()
+. fo.close()
+. lens.append(len(foo))
+. diff = time.time() - starttime
+.
+. j = 0
+. for i in lens[1:]:
+. j = j + 1
+. if not i == lens[0]:
+. print "WARNING: inconsistent length on read %i: %i" % (j, i)
+.
+. return diff
+.
+. def test_timeout(url):
+[ 1
+. global DEBUG, DBPRINT
+. dbp = DBPRINT
+. def DBPRINT(*args): print ' ' + ' '.join(args)
+. DEBUG=1
+] 1
+{ 1
+. global DEBUG
+. dbbackup = DEBUG
+. class FakeLogger:
+. def debug(self, msg, *args): print msg % args
+. info = warning = error = debug
+. DEBUG = FakeLogger()
+}
+. print " fetching the file to establish a connection"
+. fo = urllib2.urlopen(url)
+. data1 = fo.read()
+. fo.close()
+.
+. i = 20
+. print " waiting %i seconds for the server to close the connection" % i
+. while i > 0:
+. sys.stdout.write('\r %2i' % i)
+. sys.stdout.flush()
+. time.sleep(1)
+. i -= 1
+. sys.stderr.write('\r')
+.
+. print " fetching the file a second time"
+. fo = urllib2.urlopen(url)
+. data2 = fo.read()
+. fo.close()
+.
+. if data1 == data2:
+. print ' data are identical'
+. else:
+. print ' ERROR: DATA DIFFER'
+.
+[ 1
+. DEBUG=0
+. DBPRINT = dbp
+] 1
+{ 1
+. DEBUG = dbbackup
+}
+.
+.
+. def test(url, N=10):
+. print "checking error hander (do this on a non-200)"
+. try: error_handler(url)
+. except IOError, e:
+. print "exiting - exception will prevent further tests"
+. sys.exit()
+. print
+. print "performing continuity test (making sure stuff isn't corrupted)"
+. continuity(url)
+. print
+. print "performing speed comparison"
+. comp(N, url)
+. print
+. print "performing dropped-connection check"
+. test_timeout(url)
+.
+. if __name__ == '__main__':
+. import time
+. import sys
+. try:
+. N = int(sys.argv[1])
+. url = sys.argv[2]
+. except:
+. print "%s " % sys.argv[0]
+. else:
+. test(url, N)
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/c2/mirror.py-20051228065045-fc444a5a0fecb96b.weave
+++ urlgrabber-2.9.7/.bzr/weaves/c2/mirror.py-20051228065045-fc444a5a0fecb96b.weave
@@ -0,0 +1,504 @@
+# bzr weave file v5
+i
+1 0a3f40f14d0ebe33e66a13e811e69666c1b70656
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+i 0
+1 b0794d4a08890e14c9ad46f2c06ad1a13ef56c08
+n wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
+
+w
+{ 0
+. # This library is free software; you can redistribute it and/or
+. # modify it under the terms of the GNU Lesser General Public
+. # License as published by the Free Software Foundation; either
+. # version 2.1 of the License, or (at your option) any later version.
+. #
+. # This library is distributed in the hope that it will be useful,
+. # but WITHOUT ANY WARRANTY; without even the implied warranty of
+. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+. # Lesser General Public License for more details.
+. #
+. # You should have received a copy of the GNU Lesser General Public
+. # License along with this library; if not, write to the
+. # Free Software Foundation, Inc.,
+. # 59 Temple Place, Suite 330,
+. # Boston, MA 02111-1307 USA
+.
+. # This file is part of urlgrabber, a high-level cross-protocol url-grabber
+. # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+.
+. """Module for downloading files from a pool of mirrors
+.
+. DESCRIPTION
+.
+. This module provides support for downloading files from a pool of
+. mirrors with configurable failover policies. To a large extent, the
+. failover policy is chosen by using different classes derived from
+. the main class, MirrorGroup.
+.
+. Instances of MirrorGroup (and cousins) act very much like URLGrabber
+. instances in that they have urlread, urlgrab, and urlopen methods.
+. They can therefore, be used in very similar ways.
+.
+. from urlgrabber.grabber import URLGrabber
+. from urlgrabber.mirror import MirrorGroup
+. gr = URLGrabber()
+. mg = MirrorGroup(gr, ['http://foo.com/some/directory/',
+. 'http://bar.org/maybe/somewhere/else/',
+. 'ftp://baz.net/some/other/place/entirely/']
+. mg.urlgrab('relative/path.zip')
+.
+. The assumption is that all mirrors are identical AFTER the base urls
+. specified, so that any mirror can be used to fetch any file.
+.
+. FAILOVER
+.
+. The failover mechanism is designed to be customized by subclassing
+. from MirrorGroup to change the details of the behavior. In general,
+. the classes maintain a master mirror list and a "current mirror"
+. index. When a download is initiated, a copy of this list and index
+. is created for that download only. The specific failover policy
+. depends on the class used, and so is documented in the class
+. documentation. Note that ANY behavior of the class can be
+. overridden, so any failover policy at all is possible (although
+. you may need to change the interface in extreme cases).
+.
+. CUSTOMIZATION
+.
+. Most customization of a MirrorGroup object is done at instantiation
+. time (or via subclassing). There are four major types of
+. customization:
+.
+. 1) Pass in a custom urlgrabber - The passed in urlgrabber will be
+. used (by default... see #2) for the grabs, so options to it
+. apply for the url-fetching
+.
+. 2) Custom mirror list - Mirror lists can simply be a list of
+. stings mirrors (as shown in the example above) but each can
+. also be a dict, allowing for more options. For example, the
+. first mirror in the list above could also have been:
+.
+. {'mirror': 'http://foo.com/some/directory/',
+. 'grabber': ,
+. 'kwargs': { }}
+.
+. All mirrors are converted to this format internally. If
+. 'grabber' is omitted, the default grabber will be used. If
+. kwargs are omitted, then (duh) they will not be used.
+.
+. 3) Pass keyword arguments when instantiating the mirror group.
+. See, for example, the failure_callback argument.
+.
+. 4) Finally, any kwargs passed in for the specific file (to the
+. urlgrab method, for example) will be folded in. The options
+. passed into the grabber's urlXXX methods will override any
+. options specified in a custom mirror dict.
+.
+. """
+.
+[ 1
+. # $Id: mirror.py,v 1.12 2004/09/07 21:19:54 mstenner Exp $
+] 1
+{ 1
+. # $Id: mirror.py,v 1.13 2005/10/22 21:57:28 mstenner Exp $
+}
+.
+. import random
+. import thread # needed for locking to make this threadsafe
+.
+[ 1
+. from grabber import URLGrabError, CallbackObject
+.
+. DEBUG=0
+. def DBPRINT(*args): print ' '.join(args)
+] 1
+{ 1
+. from grabber import URLGrabError, CallbackObject, DEBUG
+}
+.
+. try:
+. from i18n import _
+. except ImportError, msg:
+. def _(st): return st
+.
+. class GrabRequest:
+. """This is a dummy class used to hold information about the specific
+. request. For example, a single file. By maintaining this information
+. separately, we can accomplish two things:
+.
+. 1) make it a little easier to be threadsafe
+. 2) have request-specific parameters
+. """
+. pass
+.
+. class MirrorGroup:
+. """Base Mirror class
+.
+. Instances of this class are built with a grabber object and a list
+. of mirrors. Then all calls to urlXXX should be passed relative urls.
+. The requested file will be searched for on the first mirror. If the
+. grabber raises an exception (possibly after some retries) then that
+. mirror will be removed from the list, and the next will be attempted.
+. If all mirrors are exhausted, then an exception will be raised.
+.
+. MirrorGroup has the following failover policy:
+.
+. * downloads begin with the first mirror
+.
+. * by default (see default_action below) a failure (after retries)
+. causes it to increment the local AND master indices. Also,
+. the current mirror is removed from the local list (but NOT the
+. master list - the mirror can potentially be used for other
+. files)
+.
+. * if the local list is ever exhausted, a URLGrabError will be
+. raised (errno=256, no more mirrors)
+.
+. OPTIONS
+.
+. In addition to the required arguments "grabber" and "mirrors",
+. MirrorGroup also takes the following optional arguments:
+.
+. default_action
+.
+. A dict that describes the actions to be taken upon failure
+. (after retries). default_action can contain any of the
+. following keys (shown here with their default values):
+.
+. default_action = {'increment': 1,
+. 'increment_master': 1,
+. 'remove': 1,
+. 'remove_master': 0,
+. 'fail': 0}
+.
+. In this context, 'increment' means "use the next mirror" and
+. 'remove' means "never use this mirror again". The two
+. 'master' values refer to the instance-level mirror list (used
+. for all files), whereas the non-master values refer to the
+. current download only.
+.
+. The 'fail' option will cause immediate failure by re-raising
+. the exception and no further attempts to get the current
+. download.
+.
+. This dict can be set at instantiation time,
+. mg = MirrorGroup(grabber, mirrors, default_action={'fail':1})
+. at method-execution time (only applies to current fetch),
+. filename = mg.urlgrab(url, default_action={'increment': 0})
+. or by returning an action dict from the failure_callback
+. return {'fail':0}
+. in increasing precedence.
+.
+. If all three of these were done, the net result would be:
+. {'increment': 0, # set in method
+. 'increment_master': 1, # class default
+. 'remove': 1, # class default
+. 'remove_master': 0, # class default
+. 'fail': 0} # set at instantiation, reset
+. # from callback
+.
+. failure_callback
+.
+. this is a callback that will be called when a mirror "fails",
+. meaning the grabber raises some URLGrabError. If this is a
+. tuple, it is interpreted to be of the form (cb, args, kwargs)
+. where cb is the actual callable object (function, method,
+. etc). Otherwise, it is assumed to be the callable object
+. itself. The callback will be passed a grabber.CallbackObject
+. instance along with args and kwargs (if present). The following
+. attributes are defined withing the instance:
+.
+. obj.exception = < exception that was raised >
+. obj.mirror = < the mirror that was tried >
+. obj.relative_url = < url relative to the mirror >
+. obj.url = < full url that failed >
+. # .url is just the combination of .mirror
+. # and .relative_url
+.
+. The failure callback can return an action dict, as described
+. above.
+.
+. Like default_action, the failure_callback can be set at
+. instantiation time or when the urlXXX method is called. In
+. the latter case, it applies only for that fetch.
+.
+. The callback can re-raise the exception quite easily. For
+. example, this is a perfectly adequate callback function:
+.
+. def callback(obj): raise obj.exception
+.
+. WARNING: do not save the exception object (or the
+. CallbackObject instance). As they contain stack frame
+. references, they can lead to circular references.
+.
+. Notes:
+. * The behavior can be customized by deriving and overriding the
+. 'CONFIGURATION METHODS'
+. * The 'grabber' instance is kept as a reference, not copied.
+. Therefore, the grabber instance can be modified externally
+. and changes will take effect immediately.
+. """
+.
+. # notes on thread-safety:
+.
+. # A GrabRequest should never be shared by multiple threads because
+. # it's never saved inside the MG object and never returned outside it.
+. # therefore, it should be safe to access/modify grabrequest data
+. # without a lock. However, accessing the mirrors and _next attributes
+. # of the MG itself must be done when locked to prevent (for example)
+. # removal of the wrong mirror.
+.
+. ##############################################################
+. # CONFIGURATION METHODS - intended to be overridden to
+. # customize behavior
+. def __init__(self, grabber, mirrors, **kwargs):
+. """Initialize the MirrorGroup object.
+.
+. REQUIRED ARGUMENTS
+.
+. grabber - URLGrabber instance
+. mirrors - a list of mirrors
+.
+. OPTIONAL ARGUMENTS
+.
+. failure_callback - callback to be used when a mirror fails
+. default_action - dict of failure actions
+.
+. See the module-level and class level documentation for more
+. details.
+. """
+.
+. # OVERRIDE IDEAS:
+. # shuffle the list to randomize order
+. self.grabber = grabber
+. self.mirrors = self._parse_mirrors(mirrors)
+. self._next = 0
+. self._lock = thread.allocate_lock()
+. self.default_action = None
+. self._process_kwargs(kwargs)
+.
+. # if these values are found in **kwargs passed to one of the urlXXX
+. # methods, they will be stripped before getting passed on to the
+. # grabber
+. options = ['default_action', 'failure_callback']
+.
+. def _process_kwargs(self, kwargs):
+. self.failure_callback = kwargs.get('failure_callback')
+. self.default_action = kwargs.get('default_action')
+.
+. def _parse_mirrors(self, mirrors):
+. parsed_mirrors = []
+. for m in mirrors:
+. if type(m) == type(''): m = {'mirror': m}
+. parsed_mirrors.append(m)
+. return parsed_mirrors
+.
+. def _load_gr(self, gr):
+. # OVERRIDE IDEAS:
+. # shuffle gr list
+. self._lock.acquire()
+. gr.mirrors = list(self.mirrors)
+. gr._next = self._next
+. self._lock.release()
+.
+. def _get_mirror(self, gr):
+. # OVERRIDE IDEAS:
+. # return a random mirror so that multiple mirrors get used
+. # even without failures.
+. if not gr.mirrors:
+. raise URLGrabError(256, _('No more mirrors to try.'))
+. return gr.mirrors[gr._next]
+.
+. def _failure(self, gr, cb_obj):
+. # OVERRIDE IDEAS:
+. # inspect the error - remove=1 for 404, remove=2 for connection
+. # refused, etc. (this can also be done via
+. # the callback)
+. cb = gr.kw.get('failure_callback') or self.failure_callback
+. if cb:
+. if type(cb) == type( () ):
+. cb, args, kwargs = cb
+. else:
+. args, kwargs = (), {}
+. action = cb(cb_obj, *args, **kwargs) or {}
+. else:
+. action = {}
+. # XXXX - decide - there are two ways to do this
+. # the first is action-overriding as a whole - use the entire action
+. # or fall back on module level defaults
+. #action = action or gr.kw.get('default_action') or self.default_action
+. # the other is to fall through for each element in the action dict
+. a = dict(self.default_action or {})
+. a.update(gr.kw.get('default_action', {}))
+. a.update(action)
+. action = a
+. self.increment_mirror(gr, action)
+. if action and action.get('fail', 0): raise
+.
+. def increment_mirror(self, gr, action={}):
+. """Tell the mirror object increment the mirror index
+.
+. This increments the mirror index, which amounts to telling the
+. mirror object to use a different mirror (for this and future
+. downloads).
+.
+. This is a SEMI-public method. It will be called internally,
+. and you may never need to call it. However, it is provided
+. (and is made public) so that the calling program can increment
+. the mirror choice for methods like urlopen. For example, with
+. urlopen, there's no good way for the mirror group to know that
+. an error occurs mid-download (it's already returned and given
+. you the file object).
+.
+. remove --- can have several values
+. 0 do not remove the mirror from the list
+. 1 remove the mirror for this download only
+. 2 remove the mirror permanently
+.
+. beware of remove=0 as it can lead to infinite loops
+. """
+. badmirror = gr.mirrors[gr._next]
+.
+. self._lock.acquire()
+. try:
+. ind = self.mirrors.index(badmirror)
+. except ValueError:
+. pass
+. else:
+. if action.get('remove_master', 0):
+. del self.mirrors[ind]
+. elif self._next == ind and action.get('increment_master', 1):
+. self._next += 1
+. if self._next >= len(self.mirrors): self._next = 0
+. self._lock.release()
+.
+. if action.get('remove', 1):
+. del gr.mirrors[gr._next]
+. elif action.get('increment', 1):
+. gr._next += 1
+. if gr._next >= len(gr.mirrors): gr._next = 0
+.
+. if DEBUG:
+. grm = [m['mirror'] for m in gr.mirrors]
+[ 1
+. DBPRINT('GR mirrors: [%s] %i' % (' '.join(grm), gr._next))
+] 1
+{ 1
+. DEBUG.info('GR mirrors: [%s] %i', ' '.join(grm), gr._next)
+}
+. selfm = [m['mirror'] for m in self.mirrors]
+[ 1
+. DBPRINT('MAIN mirrors: [%s] %i' % (' '.join(selfm), self._next))
+] 1
+{ 1
+. DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next)
+}
+.
+. #####################################################################
+. # NON-CONFIGURATION METHODS
+. # these methods are designed to be largely workhorse methods that
+. # are not intended to be overridden. That doesn't mean you can't;
+. # if you want to, feel free, but most things can be done by
+. # by overriding the configuration methods :)
+.
+. def _join_url(self, base_url, rel_url):
+. if base_url.endswith('/') or rel_url.startswith('/'):
+. return base_url + rel_url
+. else:
+. return base_url + '/' + rel_url
+.
+. def _mirror_try(self, func, url, kw):
+. gr = GrabRequest()
+. gr.func = func
+. gr.url = url
+. gr.kw = dict(kw)
+. self._load_gr(gr)
+.
+. for k in self.options:
+. try: del kw[k]
+. except KeyError: pass
+.
+. while 1:
+. mirrorchoice = self._get_mirror(gr)
+. fullurl = self._join_url(mirrorchoice['mirror'], gr.url)
+. kwargs = dict(mirrorchoice.get('kwargs', {}))
+. kwargs.update(kw)
+. grabber = mirrorchoice.get('grabber') or self.grabber
+. func_ref = getattr(grabber, func)
+[ 1
+. if DEBUG: DBPRINT('MIRROR: trying %s -> %s' % (url, fullurl))
+] 1
+{ 1
+. if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl)
+}
+. try:
+. return func_ref( *(fullurl,), **kwargs )
+. except URLGrabError, e:
+[ 1
+. if DEBUG: DBPRINT('MIRROR: failed')
+] 1
+{ 1
+. if DEBUG: DEBUG.info('MIRROR: failed')
+}
+. obj = CallbackObject()
+. obj.exception = e
+. obj.mirror = mirrorchoice['mirror']
+. obj.relative_url = gr.url
+. obj.url = fullurl
+. self._failure(gr, obj)
+.
+. def urlgrab(self, url, filename=None, **kwargs):
+. kw = dict(kwargs)
+. kw['filename'] = filename
+. func = 'urlgrab'
+. return self._mirror_try(func, url, kw)
+.
+. def urlopen(self, url, **kwargs):
+. kw = dict(kwargs)
+. func = 'urlopen'
+. return self._mirror_try(func, url, kw)
+.
+. def urlread(self, url, limit=None, **kwargs):
+. kw = dict(kwargs)
+. kw['limit'] = limit
+. func = 'urlread'
+. return self._mirror_try(func, url, kw)
+.
+.
+. class MGRandomStart(MirrorGroup):
+. """A mirror group that starts at a random mirror in the list.
+.
+. This behavior of this class is identical to MirrorGroup, except that
+. it starts at a random location in the mirror list.
+. """
+.
+. def __init__(self, grabber, mirrors, **kwargs):
+. """Initialize the object
+.
+. The arguments for intialization are the same as for MirrorGroup
+. """
+. MirrorGroup.__init__(self, grabber, mirrors, **kwargs)
+. self._next = random.randrange(len(mirrors))
+.
+. class MGRandomOrder(MirrorGroup):
+. """A mirror group that uses mirrors in a random order.
+.
+. This behavior of this class is identical to MirrorGroup, except that
+. it uses the mirrors in a random order. Note that the order is set at
+. initialization time and fixed thereafter. That is, it does not pick a
+. random mirror after each failure.
+. """
+.
+. def __init__(self, grabber, mirrors, **kwargs):
+. """Initialize the object
+.
+. The arguments for intialization are the same as for MirrorGroup
+. """
+. MirrorGroup.__init__(self, grabber, mirrors, **kwargs)
+. random.shuffle(self.mirrors)
+.
+. if __name__ == '__main__':
+. pass
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/e3/progress.py-20051228065045-3351eb30398ed9a7.weave
+++ urlgrabber-2.9.7/.bzr/weaves/e3/progress.py-20051228065045-3351eb30398ed9a7.weave
@@ -0,0 +1,566 @@
+# bzr weave file v5
+i
+1 77bb03af8a544378af253ce0c2c071fe495fb5a5
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+i 0
+1 1d5af0fa8fa5cefa14f0886a0360fafa087dca1a
+n wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
+
+w
+{ 0
+. # This library is free software; you can redistribute it and/or
+. # modify it under the terms of the GNU Lesser General Public
+. # License as published by the Free Software Foundation; either
+. # version 2.1 of the License, or (at your option) any later version.
+. #
+. # This library is distributed in the hope that it will be useful,
+. # but WITHOUT ANY WARRANTY; without even the implied warranty of
+. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+. # Lesser General Public License for more details.
+. #
+. # You should have received a copy of the GNU Lesser General Public
+. # License along with this library; if not, write to the
+. # Free Software Foundation, Inc.,
+. # 59 Temple Place, Suite 330,
+. # Boston, MA 02111-1307 USA
+.
+. # This file is part of urlgrabber, a high-level cross-protocol url-grabber
+. # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+.
+[ 1
+. # $Id: progress.py,v 1.5 2005/01/14 18:21:41 rtomayko Exp $
+] 1
+{ 1
+. # $Id: progress.py,v 1.7 2005/08/19 21:59:07 mstenner Exp $
+}
+.
+. import sys
+. import time
+. import math
+. import thread
+.
+. class BaseMeter:
+. def __init__(self):
+. self.update_period = 0.3 # seconds
+.
+. self.filename = None
+. self.url = None
+. self.basename = None
+. self.text = None
+. self.size = None
+. self.start_time = None
+. self.last_amount_read = 0
+. self.last_update_time = None
+. self.re = RateEstimator()
+.
+. def start(self, filename=None, url=None, basename=None,
+. size=None, now=None, text=None):
+. self.filename = filename
+. self.url = url
+. self.basename = basename
+. self.text = text
+.
+. #size = None ######### TESTING
+. self.size = size
+. if not size is None: self.fsize = format_number(size) + 'B'
+.
+. if now is None: now = time.time()
+. self.start_time = now
+. self.re.start(size, now)
+. self.last_amount_read = 0
+. self.last_update_time = now
+. self._do_start(now)
+.
+. def _do_start(self, now=None):
+. pass
+.
+. def update(self, amount_read, now=None):
+. # for a real gui, you probably want to override and put a call
+. # to your mainloop iteration function here
+. if now is None: now = time.time()
+. if (now >= self.last_update_time + self.update_period) or \
+. not self.last_update_time:
+. self.re.update(amount_read, now)
+. self.last_amount_read = amount_read
+. self.last_update_time = now
+. self._do_update(amount_read, now)
+.
+. def _do_update(self, amount_read, now=None):
+. pass
+.
+. def end(self, amount_read, now=None):
+. if now is None: now = time.time()
+. self.re.update(amount_read, now)
+. self.last_amount_read = amount_read
+. self.last_update_time = now
+. self._do_end(amount_read, now)
+.
+. def _do_end(self, amount_read, now=None):
+. pass
+.
+. class TextMeter(BaseMeter):
+. def __init__(self, fo=sys.stderr):
+. BaseMeter.__init__(self)
+. self.fo = fo
+.
+. def _do_update(self, amount_read, now=None):
+. etime = self.re.elapsed_time()
+. fetime = format_time(etime)
+. fread = format_number(amount_read)
+. #self.size = None
+. if self.text is not None:
+. text = self.text
+. else:
+. text = self.basename
+. if self.size is None:
+. out = '\r%-60.60s %5sB %s ' % \
+. (text, fread, fetime)
+. else:
+. rtime = self.re.remaining_time()
+. frtime = format_time(rtime)
+. frac = self.re.fraction_read()
+. bar = '='*int(25 * frac)
+.
+. out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ETA ' % \
+. (text, frac*100, bar, fread, frtime)
+.
+. self.fo.write(out)
+. self.fo.flush()
+.
+. def _do_end(self, amount_read, now=None):
+. total_time = format_time(self.re.elapsed_time())
+. total_size = format_number(amount_read)
+. if self.text is not None:
+. text = self.text
+. else:
+. text = self.basename
+. if self.size is None:
+. out = '\r%-60.60s %5sB %s ' % \
+. (text, total_size, total_time)
+. else:
+. bar = '='*25
+. out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ' % \
+. (text, 100, bar, total_size, total_time)
+. self.fo.write(out + '\n')
+. self.fo.flush()
+.
+. text_progress_meter = TextMeter
+.
+. class MultiFileHelper(BaseMeter):
+. def __init__(self, master):
+. BaseMeter.__init__(self)
+. self.master = master
+.
+. def _do_start(self, now):
+. self.master.start_meter(self, now)
+.
+. def _do_update(self, amount_read, now):
+. # elapsed time since last update
+. self.master.update_meter(self, now)
+.
+. def _do_end(self, amount_read, now):
+. self.ftotal_time = format_time(now - self.start_time)
+. self.ftotal_size = format_number(self.last_amount_read)
+. self.master.end_meter(self, now)
+.
+. def failure(self, message, now=None):
+. self.master.failure_meter(self, message, now)
+.
+. def message(self, message):
+. self.master.message_meter(self, message)
+.
+. class MultiFileMeter:
+. helperclass = MultiFileHelper
+. def __init__(self):
+. self.meters = []
+. self.in_progress_meters = []
+. self._lock = thread.allocate_lock()
+. self.update_period = 0.3 # seconds
+.
+. self.numfiles = None
+. self.finished_files = 0
+. self.failed_files = 0
+. self.open_files = 0
+. self.total_size = None
+. self.failed_size = 0
+. self.start_time = None
+. self.finished_file_size = 0
+. self.last_update_time = None
+. self.re = RateEstimator()
+.
+. def start(self, numfiles=None, total_size=None, now=None):
+. if now is None: now = time.time()
+. self.numfiles = numfiles
+. self.finished_files = 0
+. self.failed_files = 0
+. self.open_files = 0
+. self.total_size = total_size
+. self.failed_size = 0
+. self.start_time = now
+. self.finished_file_size = 0
+. self.last_update_time = now
+. self.re.start(total_size, now)
+. self._do_start(now)
+.
+. def _do_start(self, now):
+. pass
+.
+. def end(self, now=None):
+. if now is None: now = time.time()
+. self._do_end(now)
+.
+. def _do_end(self, now):
+. pass
+.
+. def lock(self): self._lock.acquire()
+. def unlock(self): self._lock.release()
+.
+. ###########################################################
+. # child meter creation and destruction
+. def newMeter(self):
+. newmeter = self.helperclass(self)
+. self.meters.append(newmeter)
+. return newmeter
+.
+. def removeMeter(self, meter):
+. self.meters.remove(meter)
+.
+. ###########################################################
+. # child functions - these should only be called by helpers
+. def start_meter(self, meter, now):
+. if not meter in self.meters:
+. raise ValueError('attempt to use orphaned meter')
+. self._lock.acquire()
+. try:
+. if not meter in self.in_progress_meters:
+. self.in_progress_meters.append(meter)
+. self.open_files += 1
+. finally:
+. self._lock.release()
+. self._do_start_meter(meter, now)
+.
+. def _do_start_meter(self, meter, now):
+. pass
+.
+. def update_meter(self, meter, now):
+. if not meter in self.meters:
+. raise ValueError('attempt to use orphaned meter')
+. if (now >= self.last_update_time + self.update_period) or \
+. not self.last_update_time:
+. self.re.update(self._amount_read(), now)
+. self.last_update_time = now
+. self._do_update_meter(meter, now)
+.
+. def _do_update_meter(self, meter, now):
+. pass
+.
+. def end_meter(self, meter, now):
+. if not meter in self.meters:
+. raise ValueError('attempt to use orphaned meter')
+. self._lock.acquire()
+. try:
+. try: self.in_progress_meters.remove(meter)
+. except ValueError: pass
+. self.open_files -= 1
+. self.finished_files += 1
+. self.finished_file_size += meter.last_amount_read
+. finally:
+. self._lock.release()
+. self._do_end_meter(meter, now)
+.
+. def _do_end_meter(self, meter, now):
+. pass
+.
+. def failure_meter(self, meter, message, now):
+. if not meter in self.meters:
+. raise ValueError('attempt to use orphaned meter')
+. self._lock.acquire()
+. try:
+. try: self.in_progress_meters.remove(meter)
+. except ValueError: pass
+. self.open_files -= 1
+. self.failed_files += 1
+. if meter.size and self.failed_size is not None:
+. self.failed_size += meter.size
+. else:
+. self.failed_size = None
+. finally:
+. self._lock.release()
+. self._do_failure_meter(meter, message, now)
+.
+. def _do_failure_meter(self, meter, message, now):
+. pass
+.
+. def message_meter(self, meter, message):
+. pass
+.
+. ########################################################
+. # internal functions
+. def _amount_read(self):
+. tot = self.finished_file_size
+. for m in self.in_progress_meters:
+. tot += m.last_amount_read
+. return tot
+.
+.
+. class TextMultiFileMeter(MultiFileMeter):
+. def __init__(self, fo=sys.stderr):
+. self.fo = fo
+. MultiFileMeter.__init__(self)
+.
+. # files: ###/### ###% data: ######/###### ###% time: ##:##:##/##:##:##
+. def _do_update_meter(self, meter, now):
+. self._lock.acquire()
+. try:
+. format = "files: %3i/%-3i %3i%% data: %6.6s/%-6.6s %3i%% " \
+. "time: %8.8s/%8.8s"
+. df = self.finished_files
+. tf = self.numfiles or 1
+. pf = 100 * float(df)/tf + 0.49
+. dd = self.re.last_amount_read
+. td = self.total_size
+. pd = 100 * (self.re.fraction_read() or 0) + 0.49
+. dt = self.re.elapsed_time()
+. rt = self.re.remaining_time()
+. if rt is None: tt = None
+. else: tt = dt + rt
+.
+. fdd = format_number(dd) + 'B'
+. ftd = format_number(td) + 'B'
+. fdt = format_time(dt, 1)
+. ftt = format_time(tt, 1)
+.
+. out = '%-79.79s' % (format % (df, tf, pf, fdd, ftd, pd, fdt, ftt))
+. self.fo.write('\r' + out)
+. self.fo.flush()
+. finally:
+. self._lock.release()
+.
+. def _do_end_meter(self, meter, now):
+. self._lock.acquire()
+. try:
+. format = "%-30.30s %6.6s %8.8s %9.9s"
+. fn = meter.basename
+. size = meter.last_amount_read
+. fsize = format_number(size) + 'B'
+. et = meter.re.elapsed_time()
+. fet = format_time(et, 1)
+. frate = format_number(size / et) + 'B/s'
+.
+. out = '%-79.79s' % (format % (fn, fsize, fet, frate))
+. self.fo.write('\r' + out + '\n')
+. finally:
+. self._lock.release()
+. self._do_update_meter(meter, now)
+.
+. def _do_failure_meter(self, meter, message, now):
+. self._lock.acquire()
+. try:
+. format = "%-30.30s %6.6s %s"
+. fn = meter.basename
+. if type(message) in (type(''), type(u'')):
+. message = message.splitlines()
+. if not message: message = ['']
+. out = '%-79s' % (format % (fn, 'FAILED', message[0] or ''))
+. self.fo.write('\r' + out + '\n')
+. for m in message[1:]: self.fo.write(' ' + m + '\n')
+. self._lock.release()
+. finally:
+. self._do_update_meter(meter, now)
+.
+. def message_meter(self, meter, message):
+. self._lock.acquire()
+. try:
+. pass
+. finally:
+. self._lock.release()
+.
+. def _do_end(self, now):
+. self._do_update_meter(None, now)
+. self._lock.acquire()
+. try:
+. self.fo.write('\n')
+. self.fo.flush()
+. finally:
+. self._lock.release()
+.
+. ######################################################################
+. # support classes and functions
+.
+. class RateEstimator:
+. def __init__(self, timescale=5.0):
+. self.timescale = timescale
+.
+. def start(self, total=None, now=None):
+. if now is None: now = time.time()
+. self.total = total
+. self.start_time = now
+. self.last_update_time = now
+. self.last_amount_read = 0
+. self.ave_rate = None
+.
+. def update(self, amount_read, now=None):
+. if now is None: now = time.time()
+. if amount_read == 0:
+. # if we just started this file, all bets are off
+. self.last_update_time = now
+. self.last_amount_read = 0
+. self.ave_rate = None
+. return
+.
+. #print 'times', now, self.last_update_time
+. time_diff = now - self.last_update_time
+. read_diff = amount_read - self.last_amount_read
+. self.last_update_time = now
+. self.last_amount_read = amount_read
+. self.ave_rate = self._temporal_rolling_ave(\
+. time_diff, read_diff, self.ave_rate, self.timescale)
+. #print 'results', time_diff, read_diff, self.ave_rate
+.
+. #####################################################################
+. # result methods
+. def average_rate(self):
+. "get the average transfer rate (in bytes/second)"
+. return self.ave_rate
+.
+. def elapsed_time(self):
+. "the time between the start of the transfer and the most recent update"
+. return self.last_update_time - self.start_time
+.
+. def remaining_time(self):
+. "estimated time remaining"
+. if not self.ave_rate or not self.total: return None
+. return (self.total - self.last_amount_read) / self.ave_rate
+.
+. def fraction_read(self):
+. """the fraction of the data that has been read
+. (can be None for unknown transfer size)"""
+. if self.total is None: return None
+. elif self.total == 0: return 1.0
+. else: return float(self.last_amount_read)/self.total
+.
+. #########################################################################
+. # support methods
+. def _temporal_rolling_ave(self, time_diff, read_diff, last_ave, timescale):
+. """a temporal rolling average performs smooth averaging even when
+. updates come at irregular intervals. This is performed by scaling
+. the "epsilon" according to the time since the last update.
+. Specifically, epsilon = time_diff / timescale
+.
+. As a general rule, the average will take on a completely new value
+. after 'timescale' seconds."""
+. epsilon = time_diff / timescale
+. if epsilon > 1: epsilon = 1.0
+. return self._rolling_ave(time_diff, read_diff, last_ave, epsilon)
+.
+. def _rolling_ave(self, time_diff, read_diff, last_ave, epsilon):
+. """perform a "rolling average" iteration
+. a rolling average "folds" new data into an existing average with
+. some weight, epsilon. epsilon must be between 0.0 and 1.0 (inclusive)
+. a value of 0.0 means only the old value (initial value) counts,
+. and a value of 1.0 means only the newest value is considered."""
+.
+. try:
+. recent_rate = read_diff / time_diff
+. except ZeroDivisionError:
+. recent_rate = None
+. if last_ave is None: return recent_rate
+. elif recent_rate is None: return last_ave
+.
+. # at this point, both last_ave and recent_rate are numbers
+. return epsilon * recent_rate + (1 - epsilon) * last_ave
+.
+. def _round_remaining_time(self, rt, start_time=15.0):
+. """round the remaining time, depending on its size
+. If rt is between n*start_time and (n+1)*start_time round downward
+. to the nearest multiple of n (for any counting number n).
+. If rt < start_time, round down to the nearest 1.
+. For example (for start_time = 15.0):
+. 2.7 -> 2.0
+. 25.2 -> 25.0
+. 26.4 -> 26.0
+. 35.3 -> 34.0
+. 63.6 -> 60.0
+. """
+.
+. if rt < 0: return 0.0
+. shift = int(math.log(rt/start_time)/math.log(2))
+. rt = int(rt)
+. if shift <= 0: return rt
+. return float(int(rt) >> shift << shift)
+.
+.
+. def format_time(seconds, use_hours=0):
+. if seconds is None or seconds < 0:
+. if use_hours: return '--:--:--'
+. else: return '--:--'
+. else:
+. seconds = int(seconds)
+. minutes = seconds / 60
+. seconds = seconds % 60
+. if use_hours:
+. hours = minutes / 60
+. minutes = minutes % 60
+. return '%02i:%02i:%02i' % (hours, minutes, seconds)
+. else:
+. return '%02i:%02i' % (minutes, seconds)
+.
+. def format_number(number, SI=0, space=' '):
+. """Turn numbers into human-readable metric-like numbers"""
+. symbols = ['', # (none)
+. 'k', # kilo
+. 'M', # mega
+. 'G', # giga
+. 'T', # tera
+. 'P', # peta
+. 'E', # exa
+. 'Z', # zetta
+. 'Y'] # yotta
+.
+. if SI: step = 1000.0
+. else: step = 1024.0
+.
+. thresh = 999
+. depth = 0
+{ 1
+. max_depth = len(symbols) - 1
+}
+.
+[ 1
+. # we want numbers between
+. while number > thresh:
+] 1
+{ 1
+. # we want numbers between 0 and thresh, but don't exceed the length
+. # of our list. In that event, the formatting will be screwed up,
+. # but it'll still show the right number.
+. while number > thresh and depth < max_depth:
+}
+. depth = depth + 1
+. number = number / step
+[ 1
+.
+. # just in case someone needs more than 1000 yottabytes!
+. diff = depth - len(symbols) + 1
+. if diff > 0:
+. depth = depth - diff
+. number = number * thresh**depth
+] 1
+.
+. if type(number) == type(1) or type(number) == type(1L):
+{ 1
+. # it's an int or a long, which means it didn't get divided,
+. # which means it's already short enough
+}
+. format = '%i%s%s'
+. elif number < 9.95:
+. # must use 9.95 for proper sizing. For example, 9.99 will be
+. # rounded to 10.0 with the .1f format string (which is too long)
+. format = '%.1f%s%s'
+. else:
+. format = '%.0f%s%s'
+.
+. return(format % (float(number or 0), space, symbols[depth]))
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/ea/changelog-20051231042924-dcf111a076d03915.weave
+++ urlgrabber-2.9.7/.bzr/weaves/ea/changelog-20051231042924-dcf111a076d03915.weave
@@ -0,0 +1,40 @@
+# bzr weave file v5
+i
+1 0c0e6caf1c2b9aa3895b5296a88ad427484a77c1
+n wildfire@progsoc.org-20051231042934-0a9043755ce47970
+
+i 0
+1 d2d51bd7c140b0d521387d8790474cdca5e9e2f7
+n wildfire@progsoc.org-20051231043436-2e0cbd0e13fec9b4
+
+i 1
+1 22e300e0868c123283bbca2313bffa07223fc9f5
+n wildfire@progsoc.org-20051231072726-04b5b3354a60ed0e
+
+w
+{ 0
+{ 1
+{ 2
+. urlgrabber (2.9.7-2) unstable; urgency=low
+.
+. * When I imported urlgrabber into bzr, I somehow lost a Build-Dep: on
+. python. Re-adding it so I can (Closes: #335340)
+.
+. -- Anand Kumria Sat, 31 Dec 2005 15:34:22 +1100
+.
+}
+. urlgrabber (2.9.7-1) unstable; urgency=low
+.
+. * New upstream release (Closes: #344934)
+.
+. -- Anand Kumria Sat, 31 Dec 2005 15:34:22 +1100
+.
+}
+. urlgrabber (2.9.6-1) unstable; urgency=low
+.
+. * Initial release (Closes: #312698)
+.
+. -- Anand Kumria Sun, 9 Oct 2005 13:06:55 +1000
+.
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/c9/compat-20051231042924-423c1341dd76a82b.weave
+++ urlgrabber-2.9.7/.bzr/weaves/c9/compat-20051231042924-423c1341dd76a82b.weave
@@ -0,0 +1,10 @@
+# bzr weave file v5
+i
+1 9c6b057a2b9d96a4067a749ee3b3b0158d390cf1
+n wildfire@progsoc.org-20051231042934-0a9043755ce47970
+
+w
+{ 0
+. 4
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/1c/control-20051231042924-90509ad6663230be.weave
+++ urlgrabber-2.9.7/.bzr/weaves/1c/control-20051231042924-90509ad6663230be.weave
@@ -0,0 +1,39 @@
+# bzr weave file v5
+i
+1 f2903ca48eef8e2eca6067aafb24f819c8af96b7
+n wildfire@progsoc.org-20051231042934-0a9043755ce47970
+
+i 0
+1 12acc8835fd4341c4da56147cf2f9ee66ea5d43b
+n wildfire@progsoc.org-20051231072726-04b5b3354a60ed0e
+
+w
+{ 0
+. Source: urlgrabber
+. Section: python
+. Priority: optional
+. Maintainer: Anand Kumria
+[ 1
+. Build-Depends: debhelper (>= 4.0.0), python2.3-dev
+] 1
+{ 1
+. Build-Depends: debhelper (>= 4.0.0), python2.3-dev, python
+}
+. Standards-Version: 3.6.2
+.
+. Package: python-urlgrabber
+. Architecture: any
+. Depends: ${shlibs:Depends}, ${misc:Depends}, ${python:Depends}
+. Description: A high-level cross-protocol url-grabber.
+. Python urlgrabber drastically simplifies the fetching of files. It is
+. designed to be used in programs that need common (but not necessarily
+. simple) url-fetching features.
+. .
+. It supports identical behavior for http://, ftp:// and file:/// URIs,
+. HTTP keepalive, byte ranges, regets, progress meters, throttling, retries,
+. access to authenticated http / ftp servers and proxies and the ability
+. to treat a list of mirrors as a single source automatically switching
+. mirrors if there is a failure.
+. .
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/56/dirs-20051231042924-e441be6cc2d04f50.weave
+++ urlgrabber-2.9.7/.bzr/weaves/56/dirs-20051231042924-e441be6cc2d04f50.weave
@@ -0,0 +1,11 @@
+# bzr weave file v5
+i
+1 4b103272a656ca3ff8da6fc341abb5478f1a0732
+n wildfire@progsoc.org-20051231042934-0a9043755ce47970
+
+w
+{ 0
+. usr/bin
+. usr/sbin
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/26/docs-20051231042924-1cddd720f701948c.weave
+++ urlgrabber-2.9.7/.bzr/weaves/26/docs-20051231042924-1cddd720f701948c.weave
@@ -0,0 +1,10 @@
+# bzr weave file v5
+i
+1 2494f9b4f745bc8470ca0e9634a892a3d652fb0e
+n wildfire@progsoc.org-20051231042934-0a9043755ce47970
+
+w
+{ 0
+. TODO
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/weaves/7e/rules-20051231042924-486916aea0d3709e.weave
+++ urlgrabber-2.9.7/.bzr/weaves/7e/rules-20051231042924-486916aea0d3709e.weave
@@ -0,0 +1,114 @@
+# bzr weave file v5
+i
+1 dadaf6c6b36d60d695160adb4efa7dbf2f1d6c29
+n wildfire@progsoc.org-20051231042934-0a9043755ce47970
+
+w
+{ 0
+. #!/usr/bin/make -f
+. # -*- makefile -*-
+. # Sample debian/rules that uses debhelper.
+. # This file was originally written by Joey Hess and Craig Small.
+. # As a special exception, when this file is copied by dh-make into a
+. # dh-make output file, you may use that output file without restriction.
+. # This special exception was added by Craig Small in version 0.37 of dh-make.
+.
+. # Uncomment this to turn on verbose mode.
+. #export DH_VERBOSE=1
+.
+.
+.
+.
+. CFLAGS = -Wall -g
+.
+. ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
+. CFLAGS += -O0
+. else
+. CFLAGS += -O2
+. endif
+.
+. configure: configure-stamp
+. configure-stamp:
+. dh_testdir
+. # Add here commands to configure the package.
+.
+. touch configure-stamp
+.
+.
+. build: build-stamp
+.
+. build-stamp: configure-stamp
+. dh_testdir
+.
+. # Add here commands to compile the package.
+. #$(MAKE)
+. python setup.py build
+. #docbook-to-man debian/urlgrabber.sgml > urlgrabber.1
+. python test/runtests.py
+.
+. touch build-stamp
+.
+. clean:
+. dh_testdir
+. dh_testroot
+. rm -f build-stamp configure-stamp
+.
+. # Add here commands to clean up after the build process.
+. #-$(MAKE) clean
+. python setup.py clean
+. find $(CURDIR) -name "*.pyc" -exec rm -f '{}' \;
+.
+. dh_clean
+.
+. install: build
+. dh_testdir
+. dh_testroot
+. dh_clean -k
+. dh_installdirs
+.
+. # Add here commands to install the package into debian/urlgrabber.
+. #$(MAKE) install DESTDIR=$(CURDIR)/debian/urlgrabber
+. python setup.py install --root=$(CURDIR)/debian/python-urlgrabber
+. #
+. rm -rf $(CURDIR)/debian/python-urlgrabber/usr/share/doc/urlgrabber-2.9.6
+.
+.
+. # Build architecture-independent files here.
+. binary-indep: build install
+. # We have nothing to do by default.
+.
+. # Build architecture-dependent files here.
+. binary-arch: build install
+. dh_testdir
+. dh_testroot
+. dh_installchangelogs ChangeLog
+. dh_installdocs
+. dh_installexamples
+. # dh_install
+. # dh_installmenu
+. # dh_installdebconf
+. # dh_installlogrotate
+. # dh_installemacsen
+. # dh_installpam
+. # dh_installmime
+. # dh_installinit
+. # dh_installcron
+. # dh_installinfo
+. dh_installman
+. dh_link
+. dh_strip
+. dh_compress
+. dh_fixperms
+. # dh_perl
+. dh_python
+. # dh_makeshlibs
+. dh_installdeb
+. dh_shlibdeps
+. dh_gencontrol
+. dh_md5sums
+. dh_builddeb
+.
+. binary: binary-indep binary-arch
+. .PHONY: build clean binary-indep binary-arch binary install configure
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/branch-format
+++ urlgrabber-2.9.7/.bzr/branch-format
@@ -0,0 +1 @@
+Bazaar-NG branch, format 6
--- urlgrabber-2.9.7.orig/.bzr/README
+++ urlgrabber-2.9.7/.bzr/README
@@ -0,0 +1,2 @@
+This is a Bazaar-NG control directory.
+Do not change any files in this directory.
--- urlgrabber-2.9.7.orig/.bzr/revision-history
+++ urlgrabber-2.9.7/.bzr/revision-history
@@ -0,0 +1,5 @@
+wildfire@progsoc.org-20051228065225-163b81689521a533
+wildfire@progsoc.org-20051231042934-0a9043755ce47970
+wildfire@progsoc.org-20051231043053-cf6e6cd84c74f297
+wildfire@progsoc.org-20051231043436-2e0cbd0e13fec9b4
+wildfire@progsoc.org-20051231072726-04b5b3354a60ed0e
\ No newline at end of file
--- urlgrabber-2.9.7.orig/.bzr/branch-name
+++ urlgrabber-2.9.7/.bzr/branch-name
@@ -0,0 +1 @@
+urlgrabber.debian
--- urlgrabber-2.9.7.orig/.bzr/inventory
+++ urlgrabber-2.9.7/.bzr/inventory
@@ -0,0 +1,37 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- urlgrabber-2.9.7.orig/.bzr/inventory.weave
+++ urlgrabber-2.9.7/.bzr/inventory.weave
@@ -0,0 +1,115 @@
+# bzr weave file v5
+i
+1 e7442ac34f623f81befad35a23206256ef273f87
+n wildfire@progsoc.org-20051228065225-163b81689521a533
+
+i 0
+1 8f4b7d91c5186771e2a108f62b365ffa63872598
+n wildfire@progsoc.org-20051228065328-c1083c82db1e7ec3
+
+i 0
+1 e6243a4d9ca9b7312bbc2d0cdf11ca395038e782
+n wildfire@progsoc.org-20051231042934-0a9043755ce47970
+
+i 2 1
+1 0e2e48ea6b140efbea1a3ce83532fc7164ebd31b
+n wildfire@progsoc.org-20051231043053-cf6e6cd84c74f297
+
+i 3
+1 8389f31e4d4aaf2bf85d6a45bbddc1fcfe99d0c6
+n wildfire@progsoc.org-20051231043436-2e0cbd0e13fec9b4
+
+i 4
+1 21b5a99c51cbf9ad01991af63b154db41fc4aed1
+n wildfire@progsoc.org-20051231072726-04b5b3354a60ed0e
+
+w
+{ 0
+.
+[ 1
+.
+] 1
+{ 1
+.
+}
+.
+.
+[ 1
+.
+] 1
+{ 1
+.
+}
+.
+[ 1
+.
+] 1
+{ 1
+.
+}
+{ 2
+.
+[ 4
+.
+] 4
+{ 4
+[ 5
+.
+}
+] 5
+{ 5
+.
+}
+.
+[ 5
+.
+] 5
+{ 5
+.
+}
+.
+.
+.
+.
+}
+.
+.
+.
+.
+.
+.
+.
+.
+.
+[ 1
+.
+.
+.
+] 1
+{ 1
+.
+.
+.
+}
+.
+.
+.
+[ 1
+.
+.
+.
+.
+.
+.
+] 1
+{ 1
+.
+.
+.
+.
+.
+.
+}
+.
+}
+W
--- urlgrabber-2.9.7.orig/.bzr/ancestry.weave
+++ urlgrabber-2.9.7/.bzr/ancestry.weave
@@ -0,0 +1,3 @@
+# bzr weave file v5
+w
+W
--- urlgrabber-2.9.7.orig/.bzr/stat-cache
+++ urlgrabber-2.9.7/.bzr/stat-cache
@@ -0,0 +1 @@
+### bzr hashcache v5
--- urlgrabber-2.9.7.orig/.bzr/parent
+++ urlgrabber-2.9.7/.bzr/parent
@@ -0,0 +1 @@
+/home/anand/Debian/python-urlgrabber/urlgrabber.tarball
--- urlgrabber-2.9.7.orig/debian/changelog
+++ urlgrabber-2.9.7/debian/changelog
@@ -0,0 +1,19 @@
+urlgrabber (2.9.7-2) unstable; urgency=low
+
+ * When I imported urlgrabber into bzr, I somehow lost a Build-Dep: on
+ python. Re-adding it so I can (Closes: #335340)
+
+ -- Anand Kumria Sat, 31 Dec 2005 15:34:22 +1100
+
+urlgrabber (2.9.7-1) unstable; urgency=low
+
+ * New upstream release (Closes: #344934)
+
+ -- Anand Kumria Sat, 31 Dec 2005 15:34:22 +1100
+
+urlgrabber (2.9.6-1) unstable; urgency=low
+
+ * Initial release (Closes: #312698)
+
+ -- Anand Kumria Sun, 9 Oct 2005 13:06:55 +1000
+
--- urlgrabber-2.9.7.orig/debian/compat
+++ urlgrabber-2.9.7/debian/compat
@@ -0,0 +1 @@
+4
--- urlgrabber-2.9.7.orig/debian/control
+++ urlgrabber-2.9.7/debian/control
@@ -0,0 +1,21 @@
+Source: urlgrabber
+Section: python
+Priority: optional
+Maintainer: Anand Kumria
+Build-Depends: debhelper (>= 4.0.0), python2.3-dev, python
+Standards-Version: 3.6.2
+
+Package: python-urlgrabber
+Architecture: any
+Depends: ${shlibs:Depends}, ${misc:Depends}, ${python:Depends}
+Description: A high-level cross-protocol url-grabber.
+ Python urlgrabber drastically simplifies the fetching of files. It is
+ designed to be used in programs that need common (but not necessarily
+ simple) url-fetching features.
+ .
+ It supports identical behavior for http://, ftp:// and file:/// URIs,
+ HTTP keepalive, byte ranges, regets, progress meters, throttling, retries,
+ access to authenticated http / ftp servers and proxies and the ability
+ to treat a list of mirrors as a single source automatically switching
+ mirrors if there is a failure.
+ .
--- urlgrabber-2.9.7.orig/debian/copyright
+++ urlgrabber-2.9.7/debian/copyright
@@ -0,0 +1,13 @@
+This package was debianized by Anand Kumria on
+Sun, 9 Oct 2005 13:06:55 +1000.
+
+It was downloaded from
+
+Copyright Holder:
+ Michael D. Stenner
+ Ryan Tomayko
+
+License:
+
+python-urlgrabber is released under the terms of the GNU Lesser General
+Licence (LGPL) a copy of which may be found in /usr/share/common-licenses/LGPL
--- urlgrabber-2.9.7.orig/debian/dirs
+++ urlgrabber-2.9.7/debian/dirs
@@ -0,0 +1,2 @@
+usr/bin
+usr/sbin
--- urlgrabber-2.9.7.orig/debian/docs
+++ urlgrabber-2.9.7/debian/docs
@@ -0,0 +1 @@
+TODO
--- urlgrabber-2.9.7.orig/debian/rules
+++ urlgrabber-2.9.7/debian/rules
@@ -0,0 +1,105 @@
+#!/usr/bin/make -f
+# -*- makefile -*-
+# Sample debian/rules that uses debhelper.
+# This file was originally written by Joey Hess and Craig Small.
+# As a special exception, when this file is copied by dh-make into a
+# dh-make output file, you may use that output file without restriction.
+# This special exception was added by Craig Small in version 0.37 of dh-make.
+
+# Uncomment this to turn on verbose mode.
+#export DH_VERBOSE=1
+
+
+
+
+CFLAGS = -Wall -g
+
+ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
+ CFLAGS += -O0
+else
+ CFLAGS += -O2
+endif
+
+configure: configure-stamp
+configure-stamp:
+ dh_testdir
+ # Add here commands to configure the package.
+
+ touch configure-stamp
+
+
+build: build-stamp
+
+build-stamp: configure-stamp
+ dh_testdir
+
+ # Add here commands to compile the package.
+ #$(MAKE)
+ python setup.py build
+ #docbook-to-man debian/urlgrabber.sgml > urlgrabber.1
+ python test/runtests.py
+
+ touch build-stamp
+
+clean:
+ dh_testdir
+ dh_testroot
+ rm -f build-stamp configure-stamp
+
+ # Add here commands to clean up after the build process.
+ #-$(MAKE) clean
+ python setup.py clean
+ find $(CURDIR) -name "*.pyc" -exec rm -f '{}' \;
+
+ dh_clean
+
+install: build
+ dh_testdir
+ dh_testroot
+ dh_clean -k
+ dh_installdirs
+
+ # Add here commands to install the package into debian/urlgrabber.
+ #$(MAKE) install DESTDIR=$(CURDIR)/debian/urlgrabber
+ python setup.py install --root=$(CURDIR)/debian/python-urlgrabber
+ #
+ rm -rf $(CURDIR)/debian/python-urlgrabber/usr/share/doc/urlgrabber-2.9.6
+
+
+# Build architecture-independent files here.
+binary-indep: build install
+# We have nothing to do by default.
+
+# Build architecture-dependent files here.
+binary-arch: build install
+ dh_testdir
+ dh_testroot
+ dh_installchangelogs ChangeLog
+ dh_installdocs
+ dh_installexamples
+# dh_install
+# dh_installmenu
+# dh_installdebconf
+# dh_installlogrotate
+# dh_installemacsen
+# dh_installpam
+# dh_installmime
+# dh_installinit
+# dh_installcron
+# dh_installinfo
+ dh_installman
+ dh_link
+ dh_strip
+ dh_compress
+ dh_fixperms
+# dh_perl
+ dh_python
+# dh_makeshlibs
+ dh_installdeb
+ dh_shlibdeps
+ dh_gencontrol
+ dh_md5sums
+ dh_builddeb
+
+binary: binary-indep binary-arch
+.PHONY: build clean binary-indep binary-arch binary install configure