Valgrind error: write of uninitialised bytes in xt_flush_indices()

Bug #534361 reported by Kristian Nielsen
6
This bug affects 1 person
Affects Status Importance Assigned to Milestone
PBXT
Fix Committed
Undecided
Vladimir Kolesnikov

Bug Description

I'm trying to enable the PBXT test suite by default in MariaDB, however for
that I need it to run without failures also in the --valgrind mode.

I get the following Valgrind error in pbxt.auto_increment:

==1562== Syscall param pwrite64(buf) points to uninitialised byte(s)
==1562== at 0x504E188: (within /usr/lib/debug/libpthread-2.7.so)
==1562== by 0xA58564: xt_pwrite_file(XTOpenFile*, long, unsigned long, void*, XTIOStats*, XTThread*) (filesys_xt.cc:846)
==1562== by 0x9F60A7: XTIndexLog::il_flush(XTOpenTable*) (index_xt.cc:4387)
==1562== by 0x9F7B5E: xt_flush_indices(XTOpenTable*, long*, int) (index_xt.cc:4008)
==1562== by 0xA1BCD1: xt_flush_table(XTThread*, XTOpenTable*) (table_xt.cc:2343)
==1562== by 0xA1C179: xt_sync_flush_table(XTThread*, XTOpenTable*) (table_xt.cc:2193)
==1562== by 0xA4591A: db_lock_table_pool(XTThread*, XTDatabase*, unsigned, int, int) (database_xt.cc:844)
==1562== by 0xA45D67: xt_db_lock_table_pool_by_name(XTThread*, XTDatabase*, XTPathStr*, int, int, int, int, XTTable**) (database_xt.cc:915)
==1562== by 0xA1D291: tab_lock_table(XTThread*, XTPathStr*, int, int, int, XTTable**) (table_xt.cc:1257)
==1562== by 0xA1EFA0: xt_drop_table(XTThread*, XTPathStr*, int) (table_xt.cc:1625)
==1562== by 0x9F3A45: ha_pbxt::delete_table(char const*) (ha_pbxt.cc:5121)
==1562== by 0x7C4E10: handler::ha_delete_table(char const*) (handler.cc:3367)
==1562== by 0x7CAD18: ha_delete_table(THD*, handlerton*, char const*, char const*, char const*, bool) (handler.cc:1987)
==1562== by 0x7F2BB8: mysql_rm_table_part2(THD*, TABLE_LIST*, bool, bool, bool, bool) (sql_table.cc:2046)
==1562== by 0x7F31CE: mysql_rm_table(THD*, TABLE_LIST*, char, char) (sql_table.cc:1818)
==1562== by 0x68D8D3: mysql_execute_command(THD*) (sql_parse.cc:3446)
==1562== Address 0x10d2e2ae is 22 bytes inside a block of size 1,049,088 alloc'd
==1562== at 0x4C22FAB: malloc (vg_replace_malloc.c:207)
==1562== by 0x9FFB4A: xt_malloc_ns(unsigned long) (memory_xt.cc:155)
==1562== by 0x9F68EB: XTIndexLogPool::ilp_open_log(XTIndexLog**, unsigned, int, XTThread*) (index_xt.cc:4199)
==1562== by 0x9F6B3D: XTIndexLogPool::ilp_get_log(XTIndexLog**, XTThread*) (index_xt.cc:4242)
==1562== by 0x9F71FD: xt_flush_indices(XTOpenTable*, long*, int) (index_xt.cc:3815)
==1562== by 0xA1BCD1: xt_flush_table(XTThread*, XTOpenTable*) (table_xt.cc:2343)
==1562== by 0xA1C179: xt_sync_flush_table(XTThread*, XTOpenTable*) (table_xt.cc:2193)
==1562== by 0xA4591A: db_lock_table_pool(XTThread*, XTDatabase*, unsigned, int, int) (database_xt.cc:844)
==1562== by 0xA45D67: xt_db_lock_table_pool_by_name(XTThread*, XTDatabase*, XTPathStr*, int, int, int, int, XTTable**) (database_xt.cc:915)
==1562== by 0xA1D291: tab_lock_table(XTThread*, XTPathStr*, int, int, int, XTTable**) (table_xt.cc:1257)
==1562== by 0xA1EFA0: xt_drop_table(XTThread*, XTPathStr*, int) (table_xt.cc:1625)
==1562== by 0x9F3A45: ha_pbxt::delete_table(char const*) (ha_pbxt.cc:5121)
==1562== by 0x7C4E10: handler::ha_delete_table(char const*) (handler.cc:3367)
==1562== by 0x7CAD18: ha_delete_table(THD*, handlerton*, char const*, char const*, char const*, bool) (handler.cc:1987)
==1562== by 0x7F2BB8: mysql_rm_table_part2(THD*, TABLE_LIST*, bool, bool, bool, bool) (sql_table.cc:2046)
==1562== by 0x7F31CE: mysql_rm_table(THD*, TABLE_LIST*, char, char) (sql_table.cc:1818)

I tried to track it down a bit further with the following patch (debug only):

Unable to load plugin 'gtk'. It requested API version (1, 17, 0) of module <module 'bzrlib' from '/usr/lib/python2.5/site-packages/bzrlib/__init__.pyc'> but the minimum exported version is (2, 1, 0), and the maximum is (2, 1, 0)
Unable to load plugin 'gtk'. It requested API version (1, 17, 0) of module <module 'bzrlib' from '/usr/lib/python2.5/site-packages/bzrlib/__init__.pyc'> but the minimum exported version is (2, 1, 0), and the maximum is (2, 1, 0)
=== modified file 'storage/pbxt/src/index_xt.cc'
--- storage/pbxt/src/index_xt.cc 2009-11-24 10:55:06 +0000
+++ storage/pbxt/src/index_xt.cc 2010-03-08 10:32:27 +0000
@@ -51,6 +51,8 @@
 #include "trace_xt.h"
 #include "table_xt.h"

+#include <valgrind/memcheck.h>
+
 #ifdef DEBUG
 #define MAX_SEARCH_DEPTH 32
 //#define CHECK_AND_PRINT
@@ -3751,6 +3753,9 @@ static xtBool ind_add_to_dirty_list(XTIn
  register u_int i;
  register u_int guess;

+ u_int block_len = XT_GET_INDEX_BLOCK_LEN(XT_GET_DISK_2(((XTIdxBranchDPtr) block->cb_data)->tb_size_2));
+ if (VALGRIND_CHECK_MEM_IS_DEFINED(block->cb_data, block_len)) fprintf(stderr, "block->cb_data=%p block_len=%u\n", block->cb_data, block_len);
+
  if (*flush_count == IND_FLUSH_BUFFER_SIZE) {
   if (!idx_flush_dirty_list(il, ot, flush_count, flush_list))
    return FAILED;
@@ -4308,6 +4313,7 @@ xtBool XTIndexLog::il_write_byte(struct
 {
  if (!il_require_space(1, ot->ot_thread))
   return FAILED;
+ VALGRIND_CHECK_VALUE_IS_DEFINED(byte);
  *(il_buffer + il_buffer_len) = byte;
  il_buffer_len++;
  return OK;
@@ -4320,6 +4326,7 @@ xtBool XTIndexLog::il_write_word4(struct
  if (!il_require_space(4, ot->ot_thread))
   return FAILED;
  buffer = il_buffer + il_buffer_len;
+ VALGRIND_CHECK_VALUE_IS_DEFINED(value);
  XT_SET_DISK_4(buffer, value);
  il_buffer_len += 4;
  return OK;
@@ -4344,7 +4351,9 @@ xtBool XTIndexLog::il_write_block(struct
  page_data = (XTIndPageDataDPtr) (il_buffer + il_buffer_len);
  TRACK_BLOCK_TO_FLUSH(node_id);
  page_data->ild_data_type = XT_DT_INDEX_PAGE;
+ VALGRIND_CHECK_VALUE_IS_DEFINED(XT_NODE_ID(node_id));
  XT_SET_DISK_4(page_data->ild_page_id_4, XT_NODE_ID(node_id));
+ if (VALGRIND_CHECK_MEM_IS_DEFINED(block->cb_data, block_len)) fprintf(stderr, "block->cb_data=%p\n", block->cb_data);
  memcpy(page_data->ild_data, block->cb_data, block_len);

  il_buffer_len += offsetof(XTIndPageDataDRec, ild_data) + block_len;

This shows that a block is at some point added to the dirty list with
partially undefined data:

==604== Uninitialised byte(s) found during client check request
==604== at 0x9F6FD3: ind_add_to_dirty_list(XTIndexLog*, XTOpenTable*, unsigned*, XTIndBlock**, XTIndBlock*) (index_xt.cc:3757)
==604== by 0x9F7476: xt_flush_indices(XTOpenTable*, long*, int) (index_xt.cc:3858)
==604== by 0xA1BCD1: xt_flush_table(XTThread*, XTOpenTable*) (table_xt.cc:2343)
==604== by 0xA1C179: xt_sync_flush_table(XTThread*, XTOpenTable*) (table_xt.cc:2193)
==604== by 0xA4591A: db_lock_table_pool(XTThread*, XTDatabase*, unsigned, int, int) (database_xt.cc:844)
==604== by 0xA45D67: xt_db_lock_table_pool_by_name(XTThread*, XTDatabase*, XTPathStr*, int, int, int, int, XTTable**) (database_xt.cc:915)
==604== by 0xA1D291: tab_lock_table(XTThread*, XTPathStr*, int, int, int, XTTable**) (table_xt.cc:1257)
==604== by 0xA1EFA0: xt_drop_table(XTThread*, XTPathStr*, int) (table_xt.cc:1625)
==604== by 0x9F3A45: ha_pbxt::delete_table(char const*) (ha_pbxt.cc:5121)
==604== by 0x7C4E10: handler::ha_delete_table(char const*) (handler.cc:3367)
==604== by 0x7CAD18: ha_delete_table(THD*, handlerton*, char const*, char const*, char const*, bool) (handler.cc:1987)
==604== by 0x7F2BB8: mysql_rm_table_part2(THD*, TABLE_LIST*, bool, bool, bool, bool) (sql_table.cc:2046)
==604== by 0x7F31CE: mysql_rm_table(THD*, TABLE_LIST*, char, char) (sql_table.cc:1818)
==604== by 0x68D8D3: mysql_execute_command(THD*) (sql_parse.cc:3446)
==604== by 0x692EB3: mysql_parse(THD*, char const*, unsigned, char const**) (sql_parse.cc:6034)
==604== by 0x693CC5: dispatch_command(enum_server_command, THD*, char*, unsigned) (sql_parse.cc:1247)
==604== Address 0xcf8ffee is 33,669,054 bytes inside a block of size 33,701,888 alloc'd
==604== at 0x4C22FAB: malloc (vg_replace_malloc.c:207)
==604== by 0x9FFC7E: xt_malloc(XTThread*, unsigned long) (memory_xt.cc:101)
==604== by 0xA43CA6: xt_ind_init(XTThread*, unsigned long) (cache_xt.cc:632)
==604== by 0x9EBBCC: pbxt_call_init(XTThread*) (ha_pbxt.cc:974)
==604== by 0x9EBF19: pbxt_init(void*) (ha_pbxt.cc:1194)
==604== by 0x7C8CBD: ha_initialize_handlerton(st_plugin_int*) (handler.cc:434)
==604== by 0x88AC96: plugin_initialize(st_plugin_int*) (sql_plugin.cc:1014)
==604== by 0x88E821: plugin_init(int*, char**, int) (sql_plugin.cc:1239)
==604== by 0x67A150: init_server_components() (mysqld.cc:4069)
==604== by 0x67AC29: main (mysqld.cc:4541)
block->cb_data=0xcf8ffe6 block_len=370

I don't know the code well enough to tell if this is a real problem, or just
some unused padding data which does not need to be initialised. In the latter
case, it would be good to add initialisation under #ifdef HAVE_valgrind
similar to Bug 451080.

Related branches

Revision history for this message
Kristian Nielsen (knielsen) wrote :
Download full text (3.7 KiB)

Another reason to eliminate these valgrind errors is that it would make it easier to distinguish the ones that are actual bugs. For example, this one, from testcase pbxt.multi_update, looks like a real problem:

==19278== Invalid write of size 1
==19278== at 0x4C270D8: memset (mc_replace_strmem.c:586)
==19278== by 0xA50298: XTDatabaseLog::xlog_append(XTThread*, unsigned long, unsigned char*, unsigned long, unsigned char*, int, unsigned int*, long*) (xactlog_xt.cc:1112)
==19278== by 0xA519A9: xt_xlog_modify_table(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned long, unsigned char*, XTThread*) (xactlog_xt.cc:1657)
==19278== by 0xA3778A: tab_add_record(XTOpenTable*, XTTabRecInfo*, unsigned int) (table_xt.cc:4017)
==19278== by 0xA38214: xt_tab_new_record(XTOpenTable*, unsigned char*) (table_xt.cc:4380)
==19278== by 0xA0489A: ha_pbxt::write_row(unsigned char*) (ha_pbxt.cc:2645)
==19278== by 0x7D7B29: handler::ha_write_row(unsigned char*) (handler.cc:4647)
==19278== by 0x72ECDC: write_record(THD*, st_table*, st_copy_info*) (sql_insert.cc:1632)
==19278== by 0x733425: select_insert::send_data(List<Item>&) (sql_insert.cc:3212)
==19278== by 0x71AAC2: end_send(JOIN*, st_join_table*, bool) (sql_select.cc:12455)
==19278== by 0x71897D: evaluate_join_record(JOIN*, st_join_table*, int) (sql_select.cc:11638)
==19278== by 0x718616: sub_select(JOIN*, st_join_table*, bool) (sql_select.cc:11518)
==19278== by 0x71812A: do_select(JOIN*, List<Item>*, st_table*, Procedure*) (sql_select.cc:11268)
==19278== by 0x7013BE: JOIN::exec() (sql_select.cc:2292)
==19278== by 0x701B0F: mysql_select(THD*, Item***, TABLE_LIST*, unsigned int, List<Item>&, Item*, unsigned int, st_order*, st_order*, Item*, st_order*, unsigned long long, select_result*, st_select_lex_unit*, st_select_lex*) (sql_select.cc:2486)
==19278== by 0x6F9E07: handle_select(THD*, st_lex*, select_result*, unsigned long) (sql_select.cc:279)
==19278== Address 0xf1c13f0 is 0 bytes after a block of size 1,049,088 alloc'd
==19278== at 0x4C25153: malloc (vg_replace_malloc.c:195)
==19278== by 0xA159D3: xt_malloc(XTThread*, unsigned long) (memory_xt.cc:101)
==19278== by 0xA4F2FB: XTDatabaseLog::xlog_setup(XTThread*, XTDatabase*, long, unsigned long, int) (xactlog_xt.cc:632)
==19278== by 0xA4A5D9: xt_xn_init_db(XTThread*, XTDatabase*) (xaction_xt.cc:1107)
==19278== by 0xA5BF89: xt_get_database(XTThread*, char*, int) (database_xt.cc:471)
==19278== by 0xA5C806: xt_open_database(XTThread*, char*, int) (database_xt.cc:639)
==19278== by 0xA0044D: xt_ha_open_database_of_table(XTThread*, XTPathStr*) (ha_pbxt.cc:512)
==19278== by 0xA09781: ha_pbxt::create(char const*, st_table*, st_ha_create_information*) (ha_pbxt.cc:5438)
==19278== by 0x7D4DB9: handler::ha_create(char const*, st_table*, st_ha_create_information*) (handler.cc:3397)
==19278== by 0x7D554D: ha_create_table(THD*, char const*, char const*, char const*, st_ha_create_information*, bool) (handler.cc:3604)
==19278== by 0x77BA6C: rea_create_table(THD*, char const*, char const*, char const*, st_ha_create_information*, List<Create_field>&, unsigned int, st_key*, h...

Read more...

Changed in pbxt:
assignee: nobody → Vladimir Kolesnikov (vkolesnikov)
status: New → In Progress
Revision history for this message
Vladimir Kolesnikov (vkolesnikov) wrote :

Kristian,

this was not a real bug. Please see the patch. The problem was that fixed character indexes get packed in memory while on index page full unpacked size is reserved.

Changed in pbxt:
status: In Progress → Fix Committed
Revision history for this message
Kristian Nielsen (knielsen) wrote :

Thanks for the fix!

BTW, the second Valgrind stacktrace is caused by a simple off-by-one error in code only used to silence another false alarm from Valgrind:

=== modified file 'storage/pbxt/src/xactlog_xt.cc'
--- storage/pbxt/src/xactlog_xt.cc 2009-11-24 10:55:06 +0000
+++ storage/pbxt/src/xactlog_xt.cc 2010-03-16 09:43:50 +0000
@@ -1109,7 +1109,8 @@ xtBool XTDatabaseLog::xlog_append(XTThre
                        part_size = 512 - part_size;
                        xl_write_buffer[xl_write_buf_pos] = XT_LOG_ENT_END_OF_LOG;
 #ifdef HAVE_valgrind
- memset(xl_write_buffer + xl_write_buf_pos + 1, 0x66, part_size);
+ if (part_size > 1)
+ memset(xl_write_buffer + xl_write_buf_pos + 1, 0x66, part_size - 1);
 #endif
                        if (!xt_pwrite_file(xl_log_file, xl_write_log_offset, xl_write_buf_pos+part_size, xl_write_buffer, &thread->st_statistics.st_xlog, thread))
                                goto write_failed;

Revision history for this message
Vladimir Kolesnikov (vkolesnikov) wrote :

Oops... Thanks for the fix :)
pushed to the attached branch.

To post a comment you must log in.
This report contains Public information  
Everyone can see this information.

Other bug subscribers

Remote bug watches

Bug watches keep track of this bug in other bug trackers.