Comment 4 for bug 890085

Revision history for this message
Martin Pool (mbp) wrote :

Earlier on, we're reading the whole file to be added in to memory, so it's hard for the compressor to deal with it as chunks.

The structure here were the compressed data is held attached to an object is perhaps making it a bit more likely it will be unnecessarily retained.

The traceback where it's added is

-> if expected_sha is not None:
(Pdb) bt
  /home/mbp/bzr/work/bzr(145)<module>()
-> exit_val = bzrlib.commands.main()
  /home/mbp/bzr/work/bzrlib/commands.py(1213)main()
-> ret = run_bzr_catch_errors(argv)
  /home/mbp/bzr/work/bzrlib/commands.py(1226)run_bzr_catch_errors()
-> return exception_to_return_code(run_bzr, argv)
  /home/mbp/bzr/work/bzrlib/commands.py(923)exception_to_return_code()
-> return the_callable(*args, **kwargs)
  /home/mbp/bzr/work/bzrlib/commands.py(1128)run_bzr()
-> ret = run(*run_argv)
  /home/mbp/bzr/work/bzrlib/commands.py(676)run_argv_aliases()
-> return self.run(**all_cmd_args)
  /home/mbp/bzr/work/bzrlib/commands.py(698)run()
-> return self._operation.run_simple(*args, **kwargs)
  /home/mbp/bzr/work/bzrlib/cleanup.py(135)run_simple()
-> self.cleanups, self.func, *args, **kwargs)
  /home/mbp/bzr/work/bzrlib/cleanup.py(165)_do_with_cleanups()
-> result = func(*args, **kwargs)
  /home/mbp/bzr/work/bzrlib/builtins.py(3497)run()
-> lossy=lossy)
  /home/mbp/bzr/work/bzrlib/decorators.py(217)write_locked()
-> result = unbound(self, *args, **kwargs)
  /home/mbp/bzr/work/bzrlib/workingtree_4.py(208)commit()
-> result = WorkingTree.commit(self, message, revprops, *args, **kwargs)
  /home/mbp/bzr/work/bzrlib/decorators.py(217)write_locked()
-> result = unbound(self, *args, **kwargs)
  /home/mbp/bzr/work/bzrlib/mutabletree.py(210)commit()
-> *args, **kwargs)
  /home/mbp/bzr/work/bzrlib/commit.py(289)commit()
-> lossy=lossy)
  /home/mbp/bzr/work/bzrlib/cleanup.py(131)run()
-> self.cleanups, self.func, self, *args, **kwargs)
  /home/mbp/bzr/work/bzrlib/cleanup.py(165)_do_with_cleanups()
-> result = func(*args, **kwargs)
  /home/mbp/bzr/work/bzrlib/commit.py(431)_commit()
-> self._update_builder_with_changes()
  /home/mbp/bzr/work/bzrlib/commit.py(691)_update_builder_with_changes()
-> self.work_tree, self.basis_revid, iter_changes):
  /home/mbp/bzr/work/bzrlib/vf_repository.py(760)record_iter_changes()
-> file_id, text, heads, nostore_sha)
  /home/mbp/bzr/work/bzrlib/vf_repository.py(829)_add_text_to_weave()
-> nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
  /home/mbp/bzr/work/bzrlib/groupcompress.py(1335)_add_text()
-> nostore_sha=nostore_sha))[0]
  /home/mbp/bzr/work/bzrlib/groupcompress.py(1835)_insert_record_stream()
-> nostore_sha=nostore_sha)
> /home/mbp/bzr/work/bzrlib/groupcompress.py(866)compress()
-> if expected_sha is not None:

With my branch so far in place, and giving it a bit more memory, the place it bombs out is

(Pdb) bt
  /home/mbp/bzr/work/bzr(145)<module>()
-> exit_val = bzrlib.commands.main()
  /home/mbp/bzr/work/bzrlib/commands.py(1213)main()
-> ret = run_bzr_catch_errors(argv)
  /home/mbp/bzr/work/bzrlib/commands.py(1226)run_bzr_catch_errors()
-> return exception_to_return_code(run_bzr, argv)
  /home/mbp/bzr/work/bzrlib/commands.py(923)exception_to_return_code()
-> return the_callable(*args, **kwargs)
  /home/mbp/bzr/work/bzrlib/commands.py(1128)run_bzr()
-> ret = run(*run_argv)
  /home/mbp/bzr/work/bzrlib/commands.py(676)run_argv_aliases()
-> return self.run(**all_cmd_args)
  /home/mbp/bzr/work/bzrlib/commands.py(698)run()
-> return self._operation.run_simple(*args, **kwargs)
  /home/mbp/bzr/work/bzrlib/cleanup.py(135)run_simple()
-> self.cleanups, self.func, *args, **kwargs)
  /home/mbp/bzr/work/bzrlib/cleanup.py(165)_do_with_cleanups()
-> result = func(*args, **kwargs)
  /home/mbp/bzr/work/bzrlib/builtins.py(3497)run()
-> lossy=lossy)
  /home/mbp/bzr/work/bzrlib/decorators.py(217)write_locked()
-> result = unbound(self, *args, **kwargs)
  /home/mbp/bzr/work/bzrlib/workingtree_4.py(208)commit()
-> result = WorkingTree.commit(self, message, revprops, *args, **kwargs)
  /home/mbp/bzr/work/bzrlib/decorators.py(217)write_locked()
-> result = unbound(self, *args, **kwargs)
  /home/mbp/bzr/work/bzrlib/mutabletree.py(210)commit()
-> *args, **kwargs)
  /home/mbp/bzr/work/bzrlib/commit.py(289)commit()
-> lossy=lossy)
  /home/mbp/bzr/work/bzrlib/cleanup.py(131)run()
-> self.cleanups, self.func, self, *args, **kwargs)
  /home/mbp/bzr/work/bzrlib/cleanup.py(165)_do_with_cleanups()
-> result = func(*args, **kwargs)
  /home/mbp/bzr/work/bzrlib/commit.py(431)_commit()
-> self._update_builder_with_changes()
  /home/mbp/bzr/work/bzrlib/commit.py(691)_update_builder_with_changes()
-> self.work_tree, self.basis_revid, iter_changes):
  /home/mbp/bzr/work/bzrlib/vf_repository.py(760)record_iter_changes()
-> file_id, text, heads, nostore_sha)
  /home/mbp/bzr/work/bzrlib/vf_repository.py(829)_add_text_to_weave()
-> nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
  /home/mbp/bzr/work/bzrlib/groupcompress.py(1335)_add_text()
-> nostore_sha=nostore_sha))[0]
  /home/mbp/bzr/work/bzrlib/groupcompress.py(1872)_insert_record_stream()
-> flush()
> /home/mbp/bzr/work/bzrlib/groupcompress.py(1747)flush()
-> bytes = ''.join(chunks)

So _DirectPackAccess._access.add_raw_records takes one big byte string apparently, and that needs to be fixed.

The to_chunks method is building a list of which one element is the bigstring chunks, so that might need to be fixed too.

add_raw_records rather bizarrely takes one big bytestring and a list of substring lengths and then chops it up again.

From the groupcompress repo, add_raw_records seems to be only ever called with one byte string, so it's a bit redundant as a layer. It calls in to ContainerWriter.add_bytes_record, which also needs one single string but could be fairly easily adapted to handle more. That in turn goes to ContainerSerializer which builds another big string and could easily