From 2e7de1aa4a3c8af684d609b2d4179a4e81b64a0e Mon Sep 17 00:00:00 2001 From: Peter Portante Date: Fri, 12 Jul 2013 13:27:56 -0400 Subject: [PATCH 2/2] *** Mike Barton's suggested patch for bug-1196932 *** Change-Id: Id2fd83caad979bfd446d52a775a1fa373a670cf1 Signed-off-by: Peter Portante --- swift/obj/diskfile.py | 80 +++++++++++++++++++----------------------- swift/obj/server.py | 2 -- test/unit/__init__.py | 12 ++++--- test/unit/obj/test_diskfile.py | 50 +++++++++++++++++++------- test/unit/obj/test_server.py | 2 +- 5 files changed, 83 insertions(+), 63 deletions(-) diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index 97845c1..28bc5fe 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -107,6 +107,41 @@ def quarantine_renamer(device_path, corrupted_file_path): return to_dir +def hash_cleanup_listdir(hsh_path, reclaim_age=ONE_WEEK): + """ + List contents of a hash directory and clean up any old files. + + :param hsh_path: object hash path + :param reclaim_age: age in seconds at which to remove tombstones + :returns: list of files remaining in the directory, reverse sorted + """ + files = os.listdir(hsh_path) + if len(files) == 1: + if files[0].endswith('.ts'): + # remove tombstones older than reclaim_age + ts = files[0].rsplit('.', 1)[0] + if (time.time() - float(ts)) > reclaim_age: + os.unlink(join(hsh_path, files[0])) + files.remove(files[0]) + elif files: + files.sort(reverse=True) + meta = data = tomb = None + for filename in list(files): + if not meta and filename.endswith('.meta'): + meta = filename + if not data and filename.endswith('.data'): + data = filename + if not tomb and filename.endswith('.ts'): + tomb = filename + if (filename < tomb or # any file older than tomb + filename < data or # any file older than data + (filename.endswith('.meta') and + filename < meta)): # old meta + os.unlink(join(hsh_path, filename)) + files.remove(filename) + return files + + def hash_suffix(path, reclaim_age): """ Performs reclamation and returns an md5 of all (remaining) files. @@ -125,7 +160,7 @@ def hash_suffix(path, reclaim_age): for hsh in path_contents: hsh_path = join(path, hsh) try: - files = os.listdir(hsh_path) + files = hash_cleanup_listdir(hsh_path, reclaim_age) except OSError, err: if err.errno == errno.ENOTDIR: partition_path = dirname(path) @@ -137,29 +172,6 @@ def hash_suffix(path, reclaim_age): (hsh_path, quar_path)) continue raise - if len(files) == 1: - if files[0].endswith('.ts'): - # remove tombstones older than reclaim_age - ts = files[0].rsplit('.', 1)[0] - if (time.time() - float(ts)) > reclaim_age: - os.unlink(join(hsh_path, files[0])) - files.remove(files[0]) - elif files: - files.sort(reverse=True) - meta = data = tomb = None - for filename in list(files): - if not meta and filename.endswith('.meta'): - meta = filename - if not data and filename.endswith('.data'): - data = filename - if not tomb and filename.endswith('.ts'): - tomb = filename - if (filename < tomb or # any file older than tomb - filename < data or # any file older than data - (filename.endswith('.meta') and - filename < meta)): # old meta - os.unlink(join(hsh_path, filename)) - files.remove(filename) if not files: os.rmdir(hsh_path) for filename in files: @@ -323,6 +335,7 @@ class DiskWriter(object): # other requests to reference. renamer(self.tmppath, join(self.disk_file.datadir, timestamp + extension)) + hash_cleanup_listdir(self.disk_file.datadir) self.threadpool.force_run_in_thread(finalize_put) self.disk_file.metadata = metadata @@ -576,25 +589,6 @@ class DiskFile(object): with self.writer() as writer: writer.put(metadata, extension=extension) - def unlinkold(self, timestamp): - """ - Remove any older versions of the object file. Any file that has an - older timestamp than timestamp will be deleted. - - :param timestamp: timestamp to compare with each file - """ - timestamp = normalize_timestamp(timestamp) - - def _unlinkold(): - for fname in os.listdir(self.datadir): - if fname < timestamp: - try: - os.unlink(join(self.datadir, fname)) - except OSError, err: # pragma: no cover - if err.errno != errno.ENOENT: - raise - self.threadpool.run_in_thread(_unlinkold) - def _drop_cache(self, fd, offset, length): """Method for no-oping buffer cache drop method.""" if not self.keep_cache: diff --git a/swift/obj/server.py b/swift/obj/server.py index 86637ba..2c15718 100644 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -412,7 +412,6 @@ class ObjectController(object): writer.put(metadata) except DiskFileNoSpace: return HTTPInsufficientStorage(drive=device, request=request) - disk_file.unlinkold(metadata['X-Timestamp']) if old_delete_at != new_delete_at: if new_delete_at: self.delete_at_update( @@ -585,7 +584,6 @@ class ObjectController(object): if orig_timestamp < req_timestamp: disk_file.put_metadata({'X-Timestamp': req_timestamp}, tombstone=True) - disk_file.unlinkold(req_timestamp) self.container_update( 'DELETE', account, container, obj, request, HeaderKeyDict({'x-timestamp': req_timestamp}), diff --git a/test/unit/__init__.py b/test/unit/__init__.py index e8bd43b..9fb6e5f 100644 --- a/test/unit/__init__.py +++ b/test/unit/__init__.py @@ -343,11 +343,13 @@ def mock(update): else: deletes.append((module, attr)) setattr(module, attr, value) - yield True - for module, attr, value in returns: - setattr(module, attr, value) - for module, attr in deletes: - delattr(module, attr) + try: + yield True + finally: + for module, attr, value in returns: + setattr(module, attr, value) + for module, attr in deletes: + delattr(module, attr) def fake_http_connect(*code_iter, **kwargs): diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index cb1cad2..18f0bf9 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -280,14 +280,13 @@ class TestDiskFileModuleMethods(unittest.TestCase): i[0] += 1 return 1 with unit_mock({'swift.obj.diskfile.getmtime': _getmtime}): - diskfile.getmtime("/tmp") hashed, hashes = diskfile.get_hashes( part, recalculate=[]) # getmtime will actually not get called. Initially, the pickle.load # will raise an exception first and later, force_rewrite will # short-circuit the if clause to determine whether to write out a # fresh hashes_file. - self.assertEquals(i[0], 1) + self.assertEquals(i[0], 0) self.assertTrue('a83' in hashes) def test_get_hashes_modified(self): @@ -312,6 +311,42 @@ class TestDiskFileModuleMethods(unittest.TestCase): part, recalculate=['a83']) self.assertEquals(i[0], 3) + def test_hash_cleanup_listdir(self): + file_list = [] + def mock_listdir(path): + return list(file_list) + def mock_unlink(path): + file_list.remove(os.path.basename(path)) + with unit_mock({'os.listdir': mock_listdir, 'os.unlink': mock_unlink}): + # purge .data if there's a newer .ts + file1 = normalize_timestamp(time()) + '.data' + file2 = normalize_timestamp(time() + 1) + '.ts' + file_list = [file1, file2] + self.assertEquals(diskfile.hash_cleanup_listdir('/whatever'), + [file2]) + + # purge .ts if there's a newer .data + file1 = normalize_timestamp(time()) + '.ts' + file2 = normalize_timestamp(time() + 1) + '.data' + file_list = [file1, file2] + self.assertEquals(diskfile.hash_cleanup_listdir('/whatever'), + [file2]) + + # keep .meta and .data if meta newer than data + file1 = normalize_timestamp(time()) + '.ts' + file2 = normalize_timestamp(time() + 1) + '.data' + file3 = normalize_timestamp(time() + 2) + '.meta' + file_list = [file1, file2, file3] + self.assertEquals(diskfile.hash_cleanup_listdir('/whatever'), + [file3, file2]) + + # keep only latest of multiple .ts files + file1 = normalize_timestamp(time()) + '.ts' + file2 = normalize_timestamp(time() + 1) + '.ts' + file3 = normalize_timestamp(time() + 2) + '.ts' + file_list = [file1, file2, file3] + self.assertEquals(diskfile.hash_cleanup_listdir('/whatever'), + [file3]) class TestDiskFile(unittest.TestCase): """Test swift.obj.diskfile.DiskFile""" @@ -631,18 +666,9 @@ class TestDiskFile(unittest.TestCase): df.put_metadata(metadata, tombstone=True) exp_name = '%s.ts' % str(normalize_timestamp(ts)) dl = os.listdir(df.datadir) - self.assertEquals(len(dl), 2) + self.assertEquals(len(dl), 1) self.assertTrue(exp_name in set(dl)) - def test_unlinkold(self): - df1 = self._get_disk_file() - future_time = str(normalize_timestamp(time() + 100)) - self._get_disk_file(ts=future_time) - self.assertEquals(len(os.listdir(df1.datadir)), 2) - df1.unlinkold(future_time) - self.assertEquals(len(os.listdir(df1.datadir)), 1) - self.assertEquals(os.listdir(df1.datadir)[0], "%s.data" % future_time) - def test_close_error(self): def err(): diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index 661f84e..8d18daa 100755 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -60,7 +60,7 @@ class TestObjectController(unittest.TestCase): def tearDown(self): """ Tear down for testing swift.object_server.ObjectController """ rmtree(os.path.dirname(self.testdir)) - tpool.execute = self._orig_tpool_exc + tpool.execute = self._orig_tpool_exc def test_REQUEST_SPECIAL_CHARS(self): obj = 'special昆%20/%' -- 1.8.1.4