From baf604498608909dec8944275d8410b7bf68ae4c Mon Sep 17 00:00:00 2001 From: Vincent Untz Date: Thu, 21 Jun 2012 14:37:41 +0200 Subject: [PATCH] Do not use pickle for serialization in memcache, but JSON We don't want to use pickle as it can execute arbitrary code. JSON is safe. However, note that it supports serialization for only some specific subset of object types; this should be enough for what we need, though. By default, json.load() will convert strings to unicode objects. We likely prefer to have str objects, so a specific JSON decoder is used to automatically convert to str. Part of bug 1006414. Change-Id: Id7d6d547b103b4f23ebf5be98b88f09ec6027ce4 --- swift/common/memcached.py | 59 +++++++++++++++++++++++++++-------- test/unit/common/test_memcached.py | 3 ++ 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/swift/common/memcached.py b/swift/common/memcached.py index ecd9332..4e4602a 100644 --- a/swift/common/memcached.py +++ b/swift/common/memcached.py @@ -20,7 +20,7 @@ version is at: http://github.com/memcached/memcached/blob/1.4.2/doc/protocol.txt """ -import cPickle as pickle +import json import logging import socket import time @@ -31,9 +31,10 @@ DEFAULT_MEMCACHED_PORT = 11211 CONN_TIMEOUT = 0.3 IO_TIMEOUT = 2.0 +# keeping PICKLE_FLAG to remember we had it PICKLE_FLAG = 1 +JSON_FLAG = 2 NODE_WEIGHT = 50 -PICKLE_PROTOCOL = 2 TRY_COUNT = 3 # if ERROR_LIMIT_COUNT errors occur in ERROR_LIMIT_TIME seconds, the server @@ -47,6 +48,34 @@ def md5hash(key): return md5(key).hexdigest() +def str_scanstring(*args, **kwargs): + """ + Uses scanstring from json, and then convert from unicode to a utf8-encoded + str. + """ + result = json.decoder.scanstring(*args, **kwargs) + return result[0].encode('utf8'), result[1] + + +class JSONDecoderStr(json.JSONDecoder): + """ + A JSON decoder that will convert JSON strings to utf8-encoded str, instead + of unicode. + """ + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True, + object_pairs_hook=None): + super(JSONDecoderStr, self).__init__(encoding=encoding, + object_hook=object_hook, + parse_float=parse_float, + parse_int=parse_int, + parse_constant=parse_constant, + strict=strict, + object_pairs_hook=object_pairs_hook) + self.parse_string = str_scanstring + self.scan_once = json.scanner.py_make_scanner(self) + + class MemcacheConnectionError(Exception): pass @@ -130,7 +159,8 @@ class MemcacheRing(object): :param key: key :param value: value - :param serialize: if True, value is pickled before sending to memcache + :param serialize: if True, value is serialized with JSON before sending + to memcache :param timeout: ttl in memcache """ key = md5hash(key) @@ -138,8 +168,8 @@ class MemcacheRing(object): timeout += time.time() flags = 0 if serialize: - value = pickle.dumps(value, PICKLE_PROTOCOL) - flags |= PICKLE_FLAG + value = json.dumps(value) + flags |= JSON_FLAG for (server, fp, sock) in self._get_conns(key): try: sock.sendall('set %s %d %d %s noreply\r\n%s\r\n' % \ @@ -151,8 +181,8 @@ class MemcacheRing(object): def get(self, key): """ - Gets the object specified by key. It will also unpickle the object - before returning if it is pickled in memcache. + Gets the object specified by key. It will also unserialize the object + before returning if it is serialized with JSON in memcache. :param key: key :returns: value of the key in memcache @@ -167,8 +197,8 @@ class MemcacheRing(object): if line[0].upper() == 'VALUE' and line[1] == key: size = int(line[3]) value = fp.read(size) - if int(line[2]) & PICKLE_FLAG: - value = pickle.loads(value) + if int(line[2]) & JSON_FLAG: + value = json.loads(value, cls=JSONDecoderStr) fp.readline() line = fp.readline().strip().split() self._return_conn(server, fp, sock) @@ -258,7 +288,8 @@ class MemcacheRing(object): :param mapping: dictonary of keys and values to be set in memcache :param servery_key: key to use in determining which server in the ring is used - :param serialize: if True, value is pickled before sending to memcache + :param serialize: if True, value is serialized with JSON before sending + to memcache :param timeout: ttl for memcache """ server_key = md5hash(server_key) @@ -269,8 +300,8 @@ class MemcacheRing(object): key = md5hash(key) flags = 0 if serialize: - value = pickle.dumps(value, PICKLE_PROTOCOL) - flags |= PICKLE_FLAG + value = json.dumps(value) + flags |= JSON_FLAG msg += ('set %s %d %d %s noreply\r\n%s\r\n' % (key, flags, timeout, len(value), value)) for (server, fp, sock) in self._get_conns(server_key): @@ -301,8 +332,8 @@ class MemcacheRing(object): if line[0].upper() == 'VALUE': size = int(line[3]) value = fp.read(size) - if int(line[2]) & PICKLE_FLAG: - value = pickle.loads(value) + if int(line[2]) & JSON_FLAG: + value = json.loads(value, cls=JSONDecoderStr) responses[line[1]] = value fp.readline() line = fp.readline().strip().split() diff --git a/test/unit/common/test_memcached.py b/test/unit/common/test_memcached.py index 029a92c..6b6b152 100644 --- a/test/unit/common/test_memcached.py +++ b/test/unit/common/test_memcached.py @@ -1,3 +1,4 @@ + # -*- coding: utf8 -*- # Copyright (c) 2010-2012 OpenStack, LLC. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -161,6 +162,8 @@ class TestMemcached(unittest.TestCase): self.assertEquals(memcache_client.get('some_key'), [1, 2, 3]) memcache_client.set('some_key', [4, 5, 6]) self.assertEquals(memcache_client.get('some_key'), [4, 5, 6]) + memcache_client.set('some_key', ['simple str', 'utf8 str éà']) + self.assertEquals(memcache_client.get('some_key'), ['simple str', 'utf8 str éà']) self.assert_(float(mock.cache.values()[0][1]) == 0) esttimeout = time.time() + 10 memcache_client.set('some_key', [1, 2, 3], timeout=10) -- 1.7.7