Following load with truncate, secondary indexes has incorrect number of rows.
Affects | Status | Importance | Assigned to | Milestone | |
---|---|---|---|---|---|
Trafodion |
New
|
High
|
Suresh Subbiah |
Bug Description
Target table with 2 secondary indexes. On cluster with 20150528_0830 build, following a load-with-
SQL>load into cdr
select * from hive.hive.ext_cdr where col7 <= 65535;
UTIL_OUTPUT
-------
Task: LOAD Status: Started Object: TRAFODION.
Task: CLEANUP Status: Started Object: TRAFODION.
Task: CLEANUP Status: Ended Object: TRAFODION.
Task: PREPARATION Status: Started Object: TRAFODION.
Rows Processed: 499439
Task: PREPARATION Status: Ended ET: 00:01:46.041
Task: COMPLETION Status: Started Object: TRAFODION.
Task: COMPLETION Status: Ended ET: 00:00:07.676
--- SQL operation complete.
SQL>select count(*) from cdr;
(EXPR)
-------
--- 1 row(s) selected.
DBROOT>select count(*) from table(index_table cdr_col4_idx);
(EXPR)
-------
--- 1 row(s) selected.
DBROOT>select count(*) from table(index_table cdr_col7_idx);
(EXPR)
-------
--- 1 row(s) selected.
SQL>load with truncate table into cdr
select * from hive.hive.ext_cdr where col7 > 65535;
UTIL_OUTPUT
-------
Task: LOAD Status: Started Object: TRAFODION.
Task: PURGE DATA Status: Started Object: TRAFODION.
Task: PURGE DATA Status: Ended Object: TRAFODION.
Task: CLEANUP Status: Started Object: TRAFODION.
Task: CLEANUP Status: Ended Object: TRAFODION.
Task: PREPARATION Status: Started Object: TRAFODION.
Rows Processed: 9500561
Task: PREPARATION Status: Ended ET: 00:27:44.180
Task: COMPLETION Status: Started Object: TRAFODION.
Task: COMPLETION Status: Ended ET: 00:00:05.800
--- SQL operation complete.
SQL>select count(*) from cdr;
(EXPR)
-------
--- 1 row(s) selected.
DBROOT>select count(*) from table(index_table cdr_col4_idx);
(EXPR)
-------
--- 1 row(s) selected.
DBROOT>select count(*) from table(index_table cdr_col7_idx);
(EXPR)
-------
--- 1 row(s) selected.
To reproduce:
1. hdfs dfs -mkdir /bulkload/cdr
2. hdfs dfs -put cdr.tbl.000[1-4] /bulkload/cdr/.
data files are large, contact <email address hidden> for data files or subset.
3. In hive,
drop table ext_cdr;
create table ext_cdr
(
col1 string,
col2 string,
col3 bigint,
col4 bigint,
col5 string,
col6 string,
col7 int,
col8 int,
col9 string,
col10 string
)
row format delimited fields terminated by '|'
location '/bulkload/cdr'
;
4. in trafci,
CREATE TABLE CDR
(
COl1 CHAR(6) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
, COL2 CHAR(2) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
, COL3 LARGEINT NO DEFAULT NOT NULL NOT DROPPABLE
, COL4 LARGEINT DEFAULT NULL
, COL5 CHAR(14) CHARACTER SET ISO88591 COLLATE DEFAULT NOT NULL
, COL6 CHAR(14) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
, COL7 INT DEFAULT NULL
, COL8 INT DEFAULT NULL
, COL9 CHAR(15) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
, COL10 CHAR(16) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
)
STORE BY (COL3, COL5)
DIVISION BY (SUBSTRING(COL5, 1, 6))
SALT USING 5 PARTITIONS ON (COL3)
HBASE_OPTIONS
(
DATA_BLOCK_ENCODING = 'FAST_DIFF',
COMPRESSION = 'SNAPPY');
create index cdr_col4_idx on cdr(col4)
salt like table
HBASE_OPTIONS
(
DATA_BLOCK_ENCODING = 'FAST_DIFF',
COMPRESSION = 'SNAPPY');
create index cdr_col7_idx on cdr(col7)
salt like table
HBASE_OPTIONS
(
DATA_BLOCK_ENCODING = 'FAST_DIFF',
COMPRESSION = 'SNAPPY');
load into cdr select * from hive.hive.ext_cdr where col7 <= 65535; --expect 499439 rows processed
select count(*) from cdr;
select count(*) from table(index_table cdr_col4_idx);
select count(*) from table(index_table cdr_col7_idx);
load with truncate table into cdr select * from hive.hive.ext_cdr where col7 > 65535; --expect 9500561 rows processed
select count(*) from cdr;
select count(*) from table(index_table cdr_col4_idx);
select count(*) from table(index_table cdr_col7_idx);
Changed in trafodion: | |
importance: | Undecided → High |