David, Here are my results with a hybrid JSON/struct approach included: https://gist.github.com/859ba4995a3df9f45913#file_report_2.7.markdown
As you predicted, the JSON/struct hybrid performs extremely well. Here's the code in case you're having trouble viewing the gist:
def json_hybrid_serialize_ring(ring, filename): gz = GzipFile(filename, 'wb', compresslevel=GZ_LEVEL) json_text = json.dumps([ring['part_shift'], ring['devs']]) json_len = len(json_text) gz.write(struct.pack('!I%ds' % json_len, json_len, json_text)) gz.write(struct.pack('!H', len(ring['replica2part2dev_id']))) for part2dev_id in ring['replica2part2dev_id']: part_count = len(part2dev_id) gz.write(struct.pack( '!II%ds' % (part_count * part2dev_id.itemsize,), part_count, part2dev_id.itemsize, part2dev_id.tostring())) gz.close()
def json_hybrid_deserialize_ring(filename): gz = GzipFile(filename) ring_dict = { 'replica2part2dev_id': [], } json_len, = struct.unpack('!I', gz.read(4)) ring_dict['part_shift'], ring_dict['devs'] = json.loads(gz.read(json_len)) replica_count, = struct.unpack('!H', gz.read(2)) for _ in range(replica_count): part_count, part_size = struct.unpack('!II', gz.read(8)) ring_dict['replica2part2dev_id'].append( array.array('H', gz.read(part_count * part_size))) return ring_dict
The on-disk structure is: <json_len><json_text><replica_count>[<part_count><part_size><part_data>...]
David, /gist.github. com/859ba4995a3 df9f45913# file_report_ 2.7.markdown
Here are my results with a hybrid JSON/struct approach included: https:/
As you predicted, the JSON/struct hybrid performs extremely well. Here's the code in case you're having trouble viewing the gist:
def json_hybrid_ serialize_ ring(ring, filename): GZ_LEVEL) [ring[' part_shift' ], ring['devs']]) write(struct. pack('! I%ds' % json_len,
json_ len, json_text)) write(struct. pack('! H', len(ring[ 'replica2part2d ev_id'] ))) art2dev_ id']:
gz.write( struct. pack(
'! II%ds' % (part_count * part2dev_ id.itemsize, ),
part_ count, part2dev_ id.itemsize, part2dev_ id.tostring( )))
gz = GzipFile(filename, 'wb', compresslevel=
json_text = json.dumps(
json_len = len(json_text)
gz.
gz.
for part2dev_id in ring['replica2p
part_count = len(part2dev_id)
gz.close()
def json_hybrid_ deserialize_ ring(filename) :
'replica2part2 dev_id' : [], dict['part_ shift'] , ring_dict['devs'] = json.loads( gz.read( json_len) ) count): unpack( '!II', gz.read(8))
ring_dict[ 'replica2part2d ev_id'] .append(
array. array(' H', gz.read(part_count * part_size)))
gz = GzipFile(filename)
ring_dict = {
}
json_len, = struct.unpack('!I', gz.read(4))
ring_
replica_count, = struct.unpack('!H', gz.read(2))
for _ in range(replica_
part_count, part_size = struct.
return ring_dict
The on-disk structure is: <json_text> <replica_ count>[ <part_count> <part_size> <part_data> ...]
<json_len>