MySQL 5.6.16 fixed http://bugs.mysql.com/bug.php?id=70768 by changing dict_table_t stat-related member locking. Instead of a static array of 64 shared latches, to eliminate contention on these, each dict_table_t object got its own dynamically created and destroyed latch.
The short-lived private dict_table_t objects such as those used in ibuf merge got these dynamic rwlatches too. Each (non-priority) rwlatch creation performs heap memory allocations, acquires rw_lock_list_mutex, creates two OS events, which in turn acquire os_sync_mutex.
Parts of a Perf report on the preliminary PS-5.6.16 branch.
The above causes a performance regression on MySQL 5.6.16 merge in TPS of RW Sysbench (I/O bound (!)): about 7% drop at 512 threads, 20% at 1024 threads.
MySQL 5.6.16 fixed http:// bugs.mysql. com/bug. php?id= 70768 by changing dict_table_t stat-related member locking. Instead of a static array of 64 shared latches, to eliminate contention on these, each dict_table_t object got its own dynamically created and destroyed latch.
The short-lived private dict_table_t objects such as those used in ibuf merge got these dynamic rwlatches too. Each (non-priority) rwlatch creation performs heap memory allocations, acquires rw_lock_list_mutex, creates two OS events, which in turn acquire os_sync_mutex.
Parts of a Perf report on the preliminary PS-5.6.16 branch.
- 14,88% mysqld mysqld [.] ut_delay(unsigned long) wait(void* , bool, char const*, unsigned long) create_ func(rw_ lock_t* , char const*)
dict_mem_ table_create( char const*, unsigned long, unsigned long, unsigned long, unsigned long) entry_from_ ibuf_rec_ func(unsigned char const*, mem_block_info_t*, dict_index_t**) free_func( rw_lock_ t*) - dict_mem_ table_free( dict_table_ t*) get_volume_ buffered_ count_funcPKhPm mPl.clone. 0 or_delete_ for_page( buf_block_ t*, unsigned long, unsigned long, unsigned long, unsigned long) get_volume_ func(unsigned char const*) create_ func(rw_ lock_t* , char const*)
dict_mem_ table_create( char const*, unsigned long, unsigned long, unsigned long, unsigned long) entry_from_ ibuf_rec_ func(unsigned char const*, mem_block_info_t*, dict_index_t**) func(ib_ mutex_t* , char const*)
dict_mem_ table_create( char const*, unsigned long, unsigned long, unsigned long, unsigned long) entry_from_ ibuf_rec_ func(unsigned char const*, mem_block_info_t*, dict_index_t**) reserve_ cell(sync_ array_t* , void*, unsigned long, char const*, unsigned long, unsigned long*) wait(void* , bool, char const*, unsigned long) create_ func(rw_ lock_t* , char const*)
dict_ mem_table_ create( char const*, unsigned long, unsigned long, unsigned long, unsigned long) entry_from_ ibuf_rec_ func(unsigned char const*, mem_block_info_t*, dict_index_t**) free_func( rw_lock_ t*) table_free( dict_table_ t*) get_volume_ buffered_ count_funcPKhPm mPl.clone. 0 or_delete_ for_page( buf_block_ t*, unsigned long, unsigned long, unsigned long, unsigned long) get_volume_ func(unsigned char const*) s_lock_ spin(void* , unsigned long, bool, bool, char const*, unsigned long) mutex_free_ func(pthread_ mutex_t* ) free(os_ event*) free_func( rw_lock_ t*) table_free( dict_table_ t*) get_volume_ buffered_ count_funcPKhPm mPl.clone. 0 or_delete_ for_page( buf_block_ t*, unsigned long, unsigned long, unsigned long, unsigned long) get_volume_ func(unsigned char const*) func(ib_ mutex_t* ) table_free( dict_table_ t*) get_volume_ buffered_ count_funcPKhPm mPl.clone. 0 or_delete_ for_page( buf_block_ t*, unsigned long, unsigned long, unsigned long, unsigned long) get_volume_ func(unsigned char const*) index_free( dict_index_ t*) index_free( dict_index_ t*) or_delete_ for_page( buf_block_ t*, unsigned long, unsigned long, unsigned long, unsigned long) get_volume_ func(unsigned char const*) free(os_ event*) free_func( rw_lock_ t*) table_free( dict_table_ t*) get_volume_ buffered_ count_funcPKhPm mPl.clone. 0 or_delete_ for_page( buf_block_ t*, unsigned long, unsigned long, unsigned long, unsigned long) get_volume_ func(unsigned char const*) func(ib_ mutex_t* ) table_free( dict_table_ t*) get_volume_ buffered_ count_funcPKhPm mPl.clone. 0 or_delete_ for_page( buf_block_ t*, unsigned long, unsigned long, unsigned long, unsigned long) get_volume_ func(unsigned char const*) wait_event( sync_array_ t*, unsigned long) wait(void* , bool, char const*, unsigned long) create_ func(rw_ lock_t* , char const*)
dict_ mem_table_ create( char const*, unsigned long, unsigned long, unsigned long, unsigned long) entry_from_ ibuf_rec_ func(unsigned char const*, mem_block_info_t*, dict_index_t**) free_func( rw_lock_ t*) table_free( dict_table_ t*) get_volume_ buffered_ count_funcPKhPm mPl.clone. 0 or_delete_ for_page( buf_block_ t*, unsigned long, unsigned long, unsigned long, unsigned long) get_volume_ func(unsigned char const*) s_lock_ spin(void* , unsigned long, bool, bool, char const*, unsigned long) mutex_init_ func(pthread_ mutex_t* )
dict_ mem_index_ create( char const*, char const*, unsigned long, unsigned long, unsigned long) entry_from_ ibuf_rec_ func(unsigned char const*, mem_block_info_t*, dict_index_t**) object( ib_mutex_ t*) free_func( rw_lock_ t*) table_free( dict_table_ t*) get_volume_ buffered_ count_funcPKhPm mPl.clone. 0 or_delete_ for_page( buf_block_ t*, unsigned long, unsigned long, unsigned long, unsigned long) get_volume_ func(unsigned char const*) create_ func(rw_ lock_t* , char const*)
dict_mem_ table_create( char const*, unsigned long, unsigned long, unsigned long, unsigned long) entry_from_ ibuf_rec_ func(unsigned char const*, mem_block_info_t*, dict_index_t**) free_cell( sync_array_ t*, unsigned long) wait(void* , bool, char const*, unsigned long) create_ func(rw_ lock_t* , char const*)
dict_ mem_table_ create( char const*, unsigned long, unsigned long, unsigned long, unsigned long) entry_from_ ibuf_rec_ func(unsigned char const*, mem_block_info_t*, dict_index_t**) free_func( rw_lock_ t*) table_free( dict_table_ t*) get_volume_ buffered_ count_funcPKhPm mPl.clone. 0 or_delete_ for_page( buf_block_ t*, unsigned long, unsigned long, unsigned long, unsigned long) get_volume_ func(unsigned char const*) enter_func( ib_mutex_ t*, char const*, unsigned long) mutex_enter_ funcP10ib_ mutex_tPKcm. clone.2 s_lock_ spin(void* , unsigned long, bool, bool, char const*, unsigned long) wait(void* , bool, char const*, unsigned long) wait(void* , bool, char const*, unsigned long) create_ func(rw_ lock_t* , char const*)
dict_ mem_table_ create( char const*, unsigned long, unsigned long, unsigned long, unsigned long) entry_from_ ibuf_rec_ func(unsigned char const*, mem_block_info_t*, dict_index_t**) free_func( rw_lock_ t*) table_free( dict_table_ t*) get_volume_ buffered_ count_funcPKhPm mPl.clone. 0 or_delete_ for_page( buf_block_ t*, unsigned long, unsigned long, unsigned long, unsigned long) get_volume_ func(unsigned char const*)
- ut_delay(unsigned long)
- 90,02% mutex_spin_
- 47,73% rw_lock_
+ ibuf_build_
- 46,52% rw_lock_
+ 92,34% _ZL35ibuf_
+ 6,54% ibuf_merge_
+ 1,11% ibuf_rec_
...
- 6,35% mysqld libpthread-2.12.so [.] pthread_mutex_lock
- pthread_mutex_lock
- 35,31% os_event_create()
- 60,64% rw_lock_
+ ibuf_build_
- 39,36% mutex_create_
+ ibuf_build_
- 19,28% sync_array_
- 99,16% mutex_spin_
- 50,51% rw_lock_
+ ibuf_build_
- 49,26% rw_lock_
- dict_mem_
+ 92,81% _ZL35ibuf_
+ 6,48% ibuf_merge_
+ 0,71% ibuf_rec_
+ 0,82% rw_lock_
- 18,17% os_fast_
- 67,53% os_event_
- 59,21% rw_lock_
- dict_mem_
+ 91,27% _ZL35ibuf_
+ 6,96% ibuf_merge_
+ 1,77% ibuf_rec_
- 40,79% mutex_free_
- dict_mem_
+ 91,62% _ZL35ibuf_
+ 6,85% ibuf_merge_
+ 1,53% ibuf_rec_
- 32,47% dict_mem_
+ 89,70% ibuf_dummy_
+ 8,57% ibuf_merge_
+ 1,73% ibuf_rec_
- 7,98% os_event_
- 53,98% rw_lock_
- dict_mem_
+ 91,23% _ZL35ibuf_
+ 7,07% ibuf_merge_
+ 1,69% ibuf_rec_
- 46,02% mutex_free_
- dict_mem_
+ 91,32% _ZL35ibuf_
+ 6,99% ibuf_merge_
+ 1,69% ibuf_rec_
- 7,17% sync_array_
- 98,83% mutex_spin_
- 50,14% rw_lock_
+ ibuf_build_
- 49,35% rw_lock_
- dict_mem_
+ 92,97% _ZL35ibuf_
+ 6,19% ibuf_merge_
+ 0,84% ibuf_rec_
+ 1,14% rw_lock_
- 6,76% os_fast_
+ ibuf_build_
- 3,89% mutex_signal_
- 81,44% rw_lock_
- dict_mem_
+ 92,90% _ZL35ibuf_
+ 6,37% ibuf_merge_
+ 0,73% ibuf_rec_
- 18,19% rw_lock_
+ ibuf_build_
- 0,76% sync_array_
- 87,03% mutex_spin_
- 50,19% rw_lock_
+ ibuf_build_
- 46,32% rw_lock_
- dict_mem_
+ 93,59% _ZL35ibuf_
+ 5,77% ibuf_merge_
+ 0,64% ibuf_rec_
+ 1,88% pfs_mutex_
+ 0,87% mtr_commit(mtr_t*)
+ 0,53% _ZL20pfs_
+ 12,46% rw_lock_
- 3,68% mysqld mysqld [.] mutex_spin_
- mutex_spin_
- 48,40% rw_lock_
+ ibuf_build_
- 46,88% rw_lock_
- dict_mem_
+ 92,22% _ZL35ibuf_
+ 6,71% ibuf_merge_
+ 1,07% ibuf_rec_
The above causes a performance regression on MySQL 5.6.16 merge in TPS of RW Sysbench (I/O bound (!)): about 7% drop at 512 threads, 20% at 1024 threads.