Cluster hang on lightly concurrent INSERT ... SELECT

Bug #1372938 reported by Philip Stoev
6
This bug affects 1 person
Affects Status Importance Assigned to Milestone
MySQL patches by Codership
Status tracked in 5.6
5.6
Confirmed
Undecided
Unassigned

Bug Description

Running 3 concurrent INSERT ... SELECT statements against 2 nodes caused the cluster to hang.

Stack traces:

Thread 3 (Thread 0x7f65e80e4700 (LWP 43297)):
#0 0x0000003f2e00bcf0 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1 0x00007f65f2842645 in wait (this=0x7f65e80e1680, cond=...) at galerautils/src/gu_lock.hpp:56
#2 galera::Monitor<galera::ReplicatorSMM::CommitOrder>::enter (this=this@entry=0x2e72ba0, obj=...) at galera/src/monitor.hpp:120
#3 0x00007f65f283ccd8 in galera::ReplicatorSMM::pre_commit (this=0x2e71f50, trx=0x7f65b00bd970, meta=<optimized out>) at galera/src/replicator_smm.cpp:764
#4 0x00007f65f284d580 in galera_pre_commit (gh=<optimized out>, conn_id=6, trx_handle=<optimized out>, flags=<optimized out>, meta=0x305bad0) at galera/src/wsrep_provider.cpp:505
#5 0x00000000007bb020 in wsrep_run_wsrep_commit (thd=0x3059a40, hton=<optimized out>, all=<optimized out>) at /home/philips/git/codership-mysql/sql/wsrep_hton.cc:434
#6 0x00000000005c4f0d in ha_prepare_low (thd=0x3059a40, all=false) at /home/philips/git/codership-mysql/sql/handler.cc:2150
#7 0x00000000005c42d3 in ha_commit_trans (thd=thd@entry=0x3059a40, all=all@entry=false, ignore_global_read_lock=ignore_global_read_lock@entry=false) at /home/philips/git/codership-mysql/sql/handler.cc:1457
#8 0x00000000007a2307 in trans_commit_stmt (thd=thd@entry=0x3059a40) at /home/philips/git/codership-mysql/sql/transaction.cc:473
#9 0x0000000000712d9c in mysql_execute_command (thd=thd@entry=0x3059a40) at /home/philips/git/codership-mysql/sql/sql_parse.cc:5444
#10 0x000000000071a478 in mysql_parse (parser_state=0x7f65e80e3680, length=65, rawbuf=0x7f65b0004bd0 "INSERT INTO t1 (f2) SELECT 1 FROM ten AS a1, ten AS a2, ten AS a3", thd=0x3059a40) at /home/philips/git/codership-mysql/sql/sql_parse.cc:6815
#11 wsrep_mysql_parse (thd=thd@entry=0x3059a40, rawbuf=0x7f65b0004bd0 "INSERT INTO t1 (f2) SELECT 1 FROM ten AS a1, ten AS a2, ten AS a3", length=65, parser_state=parser_state@entry=0x7f65e80e3680) at /home/philips/git/codership-mysql/sql/sql_parse.cc:6595
#12 0x000000000071bc7f in dispatch_command (command=command@entry=COM_QUERY, thd=thd@entry=0x3059a40, packet=packet@entry=0x311c2f1 "INSERT INTO t1 (f2) SELECT 1 FROM ten AS a1, ten AS a2, ten AS a3;", packet_length=packet_length@entry=66) at /home/philips/git/codership-mysql/sql/sql_parse.cc:1525
#13 0x000000000071db62 in do_command (thd=0x3059a40) at /home/philips/git/codership-mysql/sql/sql_parse.cc:1114
#14 0x00000000006e1ba5 in do_handle_one_connection (thd_arg=thd_arg@entry=0x3059a40) at /home/philips/git/codership-mysql/sql/sql_connect.cc:1007
#15 0x00000000006e1c09 in handle_one_connection (arg=0x3059a40) at /home/philips/git/codership-mysql/sql/sql_connect.cc:916
#16 0x0000003f2e007f35 in start_thread () from /lib64/libpthread.so.0
#17 0x0000003f2d8f4c3d in clone () from /lib64/libc.so.6

and

Thread 21 (Thread 0x7f65e84f5700 (LWP 43132)):
#0 0x0000003f2e00bcf0 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1 0x0000000000a2acb1 in os_cond_wait (fast_mutex=0x7f65cc02ac70, cond=0x7f65cc02acb0) at /home/philips/git/codership-mysql/storage/innobase/os/os0sync.cc:214
#2 os_event_wait_low (event=0x7f65cc02ac70, reset_sig_count=<optimized out>, reset_sig_count@entry=0) at /home/philips/git/codership-mysql/storage/innobase/os/os0sync.cc:610
#3 0x0000000000a10b62 in lock_wait_suspend_thread (thr=thr@entry=0x7f65cc0282f0) at /home/philips/git/codership-mysql/storage/innobase/lock/lock0wait.cc:344
#4 0x0000000000a67d74 in row_mysql_handle_errors (savept=0x0, savept=0x0, thr=0x7f65cc0282f0, trx=0x7f65cc0289f8, new_err=<synthetic pointer>) at /home/philips/git/codership-mysql/storage/innobase/row/row0mysql.cc:631
#5 row_lock_table_autoinc_for_mysql (prebuilt=0x7f65cc027bf8) at /home/philips/git/codership-mysql/storage/innobase/row/row0mysql.cc:1126
#6 0x00000000009dcb5c in ha_innobase::innobase_lock_autoinc (this=0x7f65cc025c90) at /home/philips/git/codership-mysql/storage/innobase/handler/ha_innodb.cc:6907
#7 0x00000000009e177b in innobase_set_max_autoinc (auto_inc=3, this=0x7f65cc025c90) at /home/philips/git/codership-mysql/storage/innobase/handler/ha_innodb.cc:6958
#8 ha_innobase::write_row (this=0x7f65cc025c90, record=0x7f65cc0277b0 "\375\002") at /home/philips/git/codership-mysql/storage/innobase/handler/ha_innodb.cc:7273
#9 0x00000000005cb3cf in handler::ha_write_row (this=0x7f65cc025c90, buf=0x7f65cc0277b0 "\375\002") at /home/philips/git/codership-mysql/sql/handler.cc:7370
#10 0x00000000008b6655 in Write_rows_log_event::write_row (this=this@entry=0x7f65cc023120, rli=rli@entry=0x7f65cc00baf0, overwrite=<optimized out>) at /home/philips/git/codership-mysql/sql/log_event.cc:12600
#11 0x00000000008b69c5 in Write_rows_log_event::do_exec_row (this=0x7f65cc023120, rli=0x7f65cc00baf0) at /home/philips/git/codership-mysql/sql/log_event.cc:12789
#12 0x00000000008a28d6 in Rows_log_event::do_apply_row (this=this@entry=0x7f65cc023120, rli=rli@entry=0x7f65cc00baf0) at /home/philips/git/codership-mysql/sql/log_event.cc:10189
#13 0x00000000008b3b3f in Rows_log_event::do_apply_event (this=0x7f65cc023120, rli=0x7f65cc00baf0) at /home/philips/git/codership-mysql/sql/log_event.cc:11326
#14 0x00000000008ac9fe in Log_event::apply_event (this=this@entry=0x7f65cc023120, rli=0x7f65cc00baf0) at /home/philips/git/codership-mysql/sql/log_event.cc:3024
#15 0x00000000005bd25c in wsrep_apply_events (buf_len=859, events_buf=0x7f65ea65f94e, thd=0x7f65cc000990) at /home/philips/git/codership-mysql/sql/wsrep_applier.cc:151
#16 wsrep_apply_cb (ctx=0x7f65cc000990, buf=0x7f65ea65f94e, buf_len=9177, flags=<optimized out>, meta=<optimized out>) at /home/philips/git/codership-mysql/sql/wsrep_applier.cc:238
#17 0x00007f65f27fc6b8 in galera::TrxHandle::apply (this=this@entry=0x7f65cc00ee10, recv_ctx=recv_ctx@entry=0x7f65cc000990, apply_cb=apply_cb@entry=0x5bcce0 <wsrep_apply_cb(void*, void const*, unsigned long, unsigned int, wsrep_trx_meta const*)>, meta=...) at galera/src/trx_handle.cpp:304
#18 0x00007f65f283863d in apply_trx_ws (recv_ctx=recv_ctx@entry=0x7f65cc000990, apply_cb=0x5bcce0 <wsrep_apply_cb(void*, void const*, unsigned long, unsigned int, wsrep_trx_meta const*)>, commit_cb=0x5bd6c0 <wsrep_commit_cb(void*, unsigned int, wsrep_trx_meta const*, bool*, bool)>, trx=..., meta=...) at galera/src/replicator_smm.cpp:37
#19 0x00007f65f283a870 in galera::ReplicatorSMM::apply_trx (this=this@entry=0x2e71f50, recv_ctx=recv_ctx@entry=0x7f65cc000990, trx=trx@entry=0x7f65cc00ee10) at galera/src/replicator_smm.cpp:420
#20 0x00007f65f283d65e in galera::ReplicatorSMM::process_trx (this=0x2e71f50, recv_ctx=0x7f65cc000990, trx=0x7f65cc00ee10) at galera/src/replicator_smm.cpp:1212
#21 0x00007f65f281af98 in galera::GcsActionSource::dispatch (this=this@entry=0x2e725a8, recv_ctx=recv_ctx@entry=0x7f65cc000990, act=..., exit_loop=@0x7f65e84f42e0: false) at galera/src/gcs_action_source.cpp:116
#22 0x00007f65f281c70c in galera::GcsActionSource::process (this=0x2e725a8, recv_ctx=0x7f65cc000990, exit_loop=@0x7f65e84f42e0: false) at galera/src/gcs_action_source.cpp:180
#23 0x00007f65f283db7b in galera::ReplicatorSMM::async_recv (this=0x2e71f50, recv_ctx=0x7f65cc000990) at galera/src/replicator_smm.cpp:355
#24 0x00007f65f284c748 in galera_recv (gh=<optimized out>, recv_ctx=<optimized out>) at galera/src/wsrep_provider.cpp:231
#25 0x00000000005bdc10 in wsrep_replication_process (thd=0x7f65cc000990) at /home/philips/git/codership-mysql/sql/wsrep_thd.cc:309
#26 0x00000000005a715b in start_wsrep_THD (arg=0x5bdbc0 <wsrep_replication_process(THD*)>) at /home/philips/git/codership-mysql/sql/mysqld.cc:5366
#27 0x0000003f2e007f35 in start_thread () from /lib64/libpthread.so.0
#28 0x0000003f2d8f4c3d in clone () from /lib64/libc.so.6

summary: - Cluster hang on concurrent INSERT ... SELECT
+ Cluster hang on lightly concurrent INSERT ... SELECT
Revision history for this message
Philip Stoev (philip-stoev-f) wrote :
Revision history for this message
Philip Stoev (philip-stoev-f) wrote :

Test case:

--source include/galera_cluster.inc
--source include/have_innodb.inc

# Create a second connection to node1 so that we can run transactions concurrently
--let $galera_connection_name = node_1a
--let $galera_server_number = 1
--source include/galera_connect.inc

--connection node_1
CREATE TABLE ten (f1 INTEGER);
INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);

CREATE TABLE t1 (f1 INTEGER AUTO_INCREMENT PRIMARY KEY, f2 INTEGER) Engine=InnoDB;
--connection node_2
#SET GLOBAL wsrep_slave_threads = 4;
# Perform casual read in order to make sure the DDL has arrivde on node_2
SELECT COUNT(*) = 0 FROM t1;

--connection node_1
--send INSERT INTO t1 (f2) SELECT 1 FROM ten AS a1, ten AS a2, ten AS a3;

--connection node_1a
--send INSERT INTO t1 (f2) SELECT 1 FROM ten AS a1, ten AS a2, ten AS a3;

--connection node_2
--send INSERT INTO t1 (f2) SELECT 1 FROM ten AS a1, ten AS a2, ten AS a3;

--connection node_1
--reap

--connection node_1a
--reap

--connection node_2
--reap
SELECT COUNT(*) = 30000 FROM t1;
SELECT COUNT(*) = 5 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user';

# SET GLOBAL wsrep_slave_threads = 1;

--connection default
DROP TABLE t1;
DROP TABLE ten;
--disconnect default

Run it in a bash while loop until it hangs. Should happen within

Revision history for this message
Philip Stoev (philip-stoev-f) wrote :

Should happen within 10 runs or so.

To post a comment you must log in.
This report contains Public information  
Everyone can see this information.

Other bug subscribers

Remote bug watches

Bug watches keep track of this bug in other bug trackers.