I have a patch for this bug which does not involve removing go_fast, but
it does involve adding a check loop. I believe this is still faster than
the previous patch, and it was the best I could do with my programming
knowledge.
I checked, this patch actually compiles and fixes the bug. :)
Regards,
- ods15
diff -u dfa.c ~/sources/debian/gawk-3.1.4/
--- dfa.c 2004-10-29 11:58:47.000000000 +0200
+++ /home/ods15/sources/debian/gawk-3.1.4/dfa.c 2004-10-29 12:00:15.000000000 +0200
@@ -2895,6 +2895,10 @@
register unsigned char eol = eolbyte; /* Likewise for eolbyte. */
static int sbit[NOTCHAR]; /* Table for anding with d->success. */
static int sbit_init;
+ static unsigned char * sameas; /* a simple check that the content
+ between begin and end are indeed
+ what they used to be */
+ static int sizesameas;
if (! sbit_init)
{
@@ -2918,14 +2922,31 @@
if (MB_CUR_MAX > 1)
{
int remain_bytes, i;
+
+ if (!sameas) {
+ MALLOC(sameas, unsigned char, end - begin + 2);
+ memset(sameas, 0, sizeof(unsigned char) * (end - begin + 1));
+ sizesameas = end - begin + 1;
+ }
+
buf_begin -= buf_offset;
if (buf_begin <= (unsigned char const *)begin && (unsigned char const *) end <= buf_end) {
+ int yesgood = sizesameas == end - begin + 1;
+ for (i = 0; i < sizesameas && yesgood; i++) {
+ if (sameas[i] != begin[i]) yesgood = 0;
+ }
+ if (yesgood) {
buf_offset = (unsigned char const *)begin - buf_begin;
buf_begin = begin;
buf_end = end;
goto go_fast;
+ }
}
+ REALLOC(sameas, unsigned char, end - begin + 2);
+ for (i = 0; i < end - begin + 1; i++) sameas[i] = begin[i];
+ sizesameas = end - begin + 1;
+
buf_offset = 0;
buf_begin = begin;
buf_end = end;
Message-Id: <E1CNTmv- 0006Gi- F6@linux15>
Date: Fri, 29 Oct 2004 12:15:17 +0200
From: Oded Shimon <email address hidden>
To: Debian Bug Tracking System <email address hidden>
Subject: Patch: Odd regexp matching problem if locale's mb_cur_max > 1
Package: gawk
Version: 1:3.1.4-1
Followup-For: Bug #266519
I have a patch for this bug which does not involve removing go_fast, but
it does involve adding a check loop. I believe this is still faster than
the previous patch, and it was the best I could do with my programming
knowledge.
I checked, this patch actually compiles and fixes the bug. :)
Regards,
- ods15
diff -u dfa.c ~/sources/ debian/ gawk-3. 1.4/
--- dfa.c 2004-10-29 11:58:47.000000000 +0200 sources/ debian/ gawk-3. 1.4/dfa. c 2004-10-29 12:00:15.000000000 +0200
+++ /home/ods15/
@@ -2895,6 +2895,10 @@
register unsigned char eol = eolbyte; /* Likewise for eolbyte. */
static int sbit[NOTCHAR]; /* Table for anding with d->success. */
static int sbit_init;
+ static unsigned char * sameas; /* a simple check that the content
+ between begin and end are indeed
+ what they used to be */
+ static int sizesameas;
if (! sbit_init)
{
@@ -2918,14 +2922,31 @@
if (MB_CUR_MAX > 1)
{
int remain_bytes, i;
+
+ if (!sameas) {
+ MALLOC(sameas, unsigned char, end - begin + 2);
+ memset(sameas, 0, sizeof(unsigned char) * (end - begin + 1));
+ sizesameas = end - begin + 1;
+ }
+
buf_begin -= buf_offset;
if (buf_begin <= (unsigned char const *)begin && (unsigned char const *) end <= buf_end) {
+ int yesgood = sizesameas == end - begin + 1;
+ for (i = 0; i < sizesameas && yesgood; i++) {
+ if (sameas[i] != begin[i]) yesgood = 0;
+ }
+ if (yesgood) {
buf_offset = (unsigned char const *)begin - buf_begin;
buf_begin = begin;
buf_end = end;
goto go_fast;
+ }
}
+ REALLOC(sameas, unsigned char, end - begin + 2);
+ for (i = 0; i < end - begin + 1; i++) sameas[i] = begin[i];
+ sizesameas = end - begin + 1;
+
buf_offset = 0;
buf_begin = begin;
buf_end = end;
-- System Information:
Debian Release: 3.1
APT prefers unstable
APT policy: (500, 'unstable')
Architecture: i386 (i686)
Kernel: Linux 2.6.6
Locale: LANG=C, LC_CTYPE=C
Versions of packages gawk depends on:
ii libc6 2.3.2.ds1-18 GNU C Library: Shared libraries an
-- no debconf information