diff -u mlocate-0.26/debian/changelog mlocate-0.26/debian/changelog --- mlocate-0.26/debian/changelog +++ mlocate-0.26/debian/changelog @@ -1,3 +1,18 @@ +mlocate (0.26-2ubuntu2) UNRELEASED; urgency=medium + + * Imported patches from upstream: + - debian/patches/0001-Call-AM_PROG_AR.patch + - debian/patches/0002-Fix-typos-in-roff-directives.patch + - debian/patches/0003-Update-gettext-version-to-silence-autotools-warnings.patch + - debian/patches/0004-Fix-a-typo-in-temporary-umask-value.patch + - debian/patches/0005-Rename-db.h-include-guard-to-be-more-specific.patch + * Added patches to improve matching of string (pending upstream review) + Fixes LP: #1752148: + - debian/patches/0006-locate-add-transliterate-support-using-iconv-to-matc.patch + - debian/patches/0007-locate-add-ignore-spaces-option-to-ignore-word-separ.patch + + -- Marco Trevisan (Treviño) Tue, 27 Feb 2018 19:25:40 +0100 + mlocate (0.26-2ubuntu1) artful; urgency=low * Merge from Debian unstable. Remaining changes: only in patch2: unchanged: --- mlocate-0.26.orig/42.patch +++ mlocate-0.26/42.patch @@ -0,0 +1,1128 @@ +From f96e90ef35f2810b8b56868468d683c8f9290b1f Mon Sep 17 00:00:00 2001 +From: Marco Trevisan (Treviño) +Date: Feb 23 2018 05:02:24 +0000 +Subject: [PATCH 1/11] locate: add --transliterate support using iconv to match accented + + +When enabled all search parameters and paths are transliterated +and used for matching. + +--- + +diff --git a/AUTHORS b/AUTHORS +index ade5106..8457862 100644 +--- a/AUTHORS ++++ b/AUTHORS +@@ -1 +1,2 @@ + Miloslav Trmac ++Marco Trevisan +diff --git a/configure.ac b/configure.ac +index 2c587ad..162f55c 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -39,6 +39,15 @@ gl_INIT + AM_GNU_GETTEXT([external], [need-ngettext]) + AM_GNU_GETTEXT_VERSION([0.18.2]) + ++AC_ARG_ENABLE(iconv, ++ AC_HELP_STRING([--disable-iconv], ++ [disable iconv support]),, ++ enable_iconv=yes) ++ ++if test x$enable_iconv = xyes; then ++ AM_ICONV ++fi ++ + # Checks for header files. + + # Checks for types. +diff --git a/src/locate.c b/src/locate.c +index 87f2c15..f7ca9d6 100644 +--- a/src/locate.c ++++ b/src/locate.c +@@ -22,6 +22,10 @@ Author: Miloslav Trmac */ + #include + #include + #include ++#if HAVE_ICONV ++#include ++#include ++#endif + #include + #include + #include +@@ -60,6 +64,9 @@ static struct string_list conf_dbpath; /* = { 0, }; */ + /* Ignore case when matching patterns */ + static bool conf_ignore_case; /* = false; */ + ++/* Ignore accents when matching patterns */ ++static bool conf_transliterate; /* = false; */ ++ + /* Return only files that match all patterns */ + static bool conf_match_all_patterns; /* = false; */ + +@@ -108,6 +115,11 @@ static bool conf_quiet; /* = false; */ + /* Output only statistics */ + static bool conf_statistics; /* = false; */ + ++#if HAVE_ICONV ++/* Iconv context for transliterate conversion */ ++static iconv_t iconv_context; /* = NULL; */ ++#endif ++ + /* String utilities */ + + /* Convert SRC to upper-case wide string in OBSTACK; +@@ -163,6 +175,34 @@ uppercase_string (struct obstack *obstack, const char *src) + return res; + } + ++#if HAVE_ICONV ++static char * ++transliterate_string (const char *str) ++{ ++ size_t len; ++ size_t retlen; ++ size_t outlen; ++ char *inbuf; ++ char *outptr; ++ char outbuf[PATH_MAX * 2]; ++ ++ inbuf = (char *) str; ++ outptr = outbuf; ++ len = strlen (str); ++ ++ outlen = sizeof (outbuf) - 1; ++ retlen = iconv (iconv_context, &inbuf, &len, &outptr, &outlen); ++ ++ if (retlen == (size_t) -1) ++ { ++ error (0, errno, _("Impossible to transliterate string %s"), str); ++ return NULL; ++ } ++ ++ return strndup (outbuf, sizeof (outbuf) - 1 - outlen); ++} ++#endif ++ + /* Write STRING to stdout, replace unprintable characters with '?' */ + static void + write_quoted (const char *string) +@@ -418,6 +458,7 @@ static int + handle_path (const char *path, int *visible) + { + const char *s, *matching; ++ char *transliterated = NULL; + + /* Statistics */ + if (conf_statistics != false) +@@ -431,8 +472,17 @@ handle_path (const char *path, int *visible) + matching = s + 1; + else + matching = path; ++#if HAVE_ICONV ++ if (conf_transliterate != false) ++ { ++ transliterated = transliterate_string (matching); ++ matching = transliterated; ++ } ++#endif + if (!string_matches_pattern (matching)) + goto done; ++ free(transliterated); ++ transliterated = NULL; + /* Visible? */ + if (*visible == -1) + *visible = check_directory_perms (path) == 0; +@@ -458,6 +508,7 @@ handle_path (const char *path, int *visible) + if (conf_output_limit_set != false && matches_found == conf_output_limit) + return -1; + done: ++ free(transliterated); + return 0; + } + +@@ -632,6 +683,10 @@ help (void) + " -h, --help print this help\n" + " -i, --ignore-case ignore case distinctions when matching " + "patterns\n" ++#if HAVE_ICONV ++ " -t, --transliterate ignore accents using iconv " ++ "transliteration when matching patterns\n" ++#endif + " -l, --limit, -n LIMIT limit output (or counting) to LIMIT " + "entries\n" + " -m, --mmap ignored, for backward compatibility\n" +@@ -669,6 +724,7 @@ parse_options (int argc, char *argv[]) + { "follow", no_argument, NULL, 'L' }, + { "help", no_argument, NULL, 'h' }, + { "ignore-case", no_argument, NULL, 'i' }, ++ { "transliterate", no_argument, NULL, 't' }, + { "limit", required_argument, NULL, 'l' }, + { "mmap", no_argument, NULL, 'm' }, + { "quiet", no_argument, NULL, 'q' }, +@@ -691,7 +747,7 @@ parse_options (int argc, char *argv[]) + { + int opt, idx; + +- opt = getopt_long (argc, argv, "0AHPLSVbcd:ehil:mn:qr:sw", options, &idx); ++ opt = getopt_long (argc, argv, "0AHPLSVbcd:ehitl:mn:qr:sw", options, &idx); + switch (opt) + { + case -1: +@@ -772,6 +828,10 @@ parse_options (int argc, char *argv[]) + conf_ignore_case = true; + break; + ++ case 't': ++ conf_transliterate = true; ++ break; ++ + case 'l': case 'n': + { + char *end; +@@ -822,6 +882,19 @@ parse_options (int argc, char *argv[]) + error (EXIT_FAILURE, 0, + _("non-option arguments are not allowed with --%s"), + conf_statistics != false ? "statistics" : "regexp"); ++ if (conf_transliterate != false) ++ { ++#if HAVE_ICONV ++ iconv_context = iconv_open ("ASCII//TRANSLIT", nl_langinfo (CODESET)); ++ if (iconv_context == (iconv_t) -1) ++ error (EXIT_FAILURE, errno, _("can not do transliteration between " \ ++ "these locales: `%s' and `ASCII'"), ++ nl_langinfo (CODESET)); ++#else ++ error (EXIT_FAILURE, errno, _("transliteration support is not supported" \ ++ "by this build of %s"), program_name); ++#endif ++ } + } + + /* Parse arguments in ARGC, ARGV. Exit on error. */ +@@ -836,6 +909,13 @@ parse_arguments (int argc, char *argv[]) + error (EXIT_FAILURE, 0, _("no pattern to search for specified")); + conf_patterns.entries = xnrealloc (conf_patterns.entries, conf_patterns.len, + sizeof (*conf_patterns.entries)); ++#if HAVE_ICONV ++ if (conf_transliterate != false) ++ { ++ for (i = 0; i < conf_patterns.len; i++) ++ conf_patterns.entries[i] = transliterate_string (conf_patterns.entries[i]); ++ } ++#endif + if (conf_match_regexp != false) + { + int cflags; +@@ -1042,6 +1122,10 @@ main (int argc, char *argv[]) + handle_dbpath_entry (conf_dbpath.entries[i]); + } + done: ++#if HAVE_ICONV ++ if (conf_transliterate != false && iconv_context) ++ iconv_close (iconv_context); ++#endif + if (conf_output_count != false) + printf ("%ju\n", matches_found); + if (conf_statistics != false || matches_found != 0) + +From 11a579c1ebbd88d61790ea16698bcf338706cbfe Mon Sep 17 00:00:00 2001 +From: Marco Trevisan (Treviño) +Date: Feb 23 2018 05:02:33 +0000 +Subject: [PATCH 2/11] doc: add -t / --transliterate to man page + + +--- + +diff --git a/doc/locate.1.in b/doc/locate.1.in +index b8d2826..9c2e629 100644 +--- a/doc/locate.1.in ++++ b/doc/locate.1.in +@@ -126,6 +126,10 @@ and exit successfully. + Ignore case distinctions when matching patterns. + + .TP ++\fB\-t\fR, \fB\-\-transliterate\fR ++Ignore accents using iconv transliteration when matching patterns. ++ ++.TP + \fB\-l\fR, \fB\-\-limit\fR, \fB\-n\fR \fILIMIT\fR + Exit successfully after finding + .I LIMIT +@@ -267,4 +271,5 @@ but it is added to other databases in this implementation and \fBslocate\fR. + Miloslav Trmac + + .SH SEE ALSO +-.BR updatedb (8) ++.BR updatedb (8), ++.BR iconv (1), + +From c84e4a786e07f518710aed135349df82b11313fa Mon Sep 17 00:00:00 2001 +From: Marco Trevisan (Treviño) +Date: Feb 23 2018 05:06:57 +0000 +Subject: [PATCH 3/11] locate: only add and check transliterated patterns if needed + + +There's no need to check transliterated strings all the times if the +targets don't need transliteration. + +--- + +diff --git a/src/locate.c b/src/locate.c +index f7ca9d6..ecd26bf 100644 +--- a/src/locate.c ++++ b/src/locate.c +@@ -199,6 +199,9 @@ transliterate_string (const char *str) + return NULL; + } + ++ if (retlen == 0) ++ return NULL; ++ + return strndup (outbuf, sizeof (outbuf) - 1 - outlen); + } + #endif +@@ -458,7 +461,6 @@ static int + handle_path (const char *path, int *visible) + { + const char *s, *matching; +- char *transliterated = NULL; + + /* Statistics */ + if (conf_statistics != false) +@@ -472,17 +474,29 @@ handle_path (const char *path, int *visible) + matching = s + 1; + else + matching = path; +-#if HAVE_ICONV +- if (conf_transliterate != false) ++ if (!string_matches_pattern (matching)) ++#if !HAVE_ICONV ++ goto done; ++#else + { +- transliterated = transliterate_string (matching); +- matching = transliterated; ++ bool matched; ++ ++ matched = false; ++ if (conf_transliterate != false) ++ { ++ char *transliterated; ++ ++ transliterated = transliterate_string (matching); ++ if (transliterated) ++ { ++ matched = string_matches_pattern (transliterated); ++ free (transliterated); ++ } ++ } ++ if (!matched) ++ goto done; + } + #endif +- if (!string_matches_pattern (matching)) +- goto done; +- free(transliterated); +- transliterated = NULL; + /* Visible? */ + if (*visible == -1) + *visible = check_directory_perms (path) == 0; +@@ -508,7 +522,6 @@ handle_path (const char *path, int *visible) + if (conf_output_limit_set != false && matches_found == conf_output_limit) + return -1; + done: +- free(transliterated); + return 0; + } + +@@ -907,15 +920,23 @@ parse_arguments (int argc, char *argv[]) + string_list_append (&conf_patterns, argv[i]); + if (conf_statistics == false && conf_patterns.len == 0) + error (EXIT_FAILURE, 0, _("no pattern to search for specified")); +- conf_patterns.entries = xnrealloc (conf_patterns.entries, conf_patterns.len, +- sizeof (*conf_patterns.entries)); + #if HAVE_ICONV + if (conf_transliterate != false) + { +- for (i = 0; i < conf_patterns.len; i++) +- conf_patterns.entries[i] = transliterate_string (conf_patterns.entries[i]); ++ size_t patterns_len = conf_patterns.len; ++ char *transliterated; ++ ++ for (i = 0; i < patterns_len; i++) ++ { ++ transliterated = transliterate_string (conf_patterns.entries[i]); ++ ++ if (transliterated) ++ string_list_append (&conf_patterns, transliterated); ++ } + } + #endif ++ conf_patterns.entries = xnrealloc (conf_patterns.entries, conf_patterns.len, ++ sizeof (*conf_patterns.entries)); + if (conf_match_regexp != false) + { + int cflags; + +From 20454fab283dbfc37ed245d0739656db0f549bc5 Mon Sep 17 00:00:00 2001 +From: Marco Trevisan (Treviño) +Date: Feb 23 2018 05:07:05 +0000 +Subject: [PATCH 4/11] locate: ignore transliterated strings only with replacement chars + + +--- + +diff --git a/src/locate.c b/src/locate.c +index ecd26bf..0775773 100644 +--- a/src/locate.c ++++ b/src/locate.c +@@ -180,8 +180,10 @@ static char * + transliterate_string (const char *str) + { + size_t len; +- size_t retlen; ++ size_t conversions; + size_t outlen; ++ size_t transliterated_len; ++ size_t i; + char *inbuf; + char *outptr; + char outbuf[PATH_MAX * 2]; +@@ -191,18 +193,27 @@ transliterate_string (const char *str) + len = strlen (str); + + outlen = sizeof (outbuf) - 1; +- retlen = iconv (iconv_context, &inbuf, &len, &outptr, &outlen); ++ conversions = iconv (iconv_context, &inbuf, &len, &outptr, &outlen); ++ transliterated_len = sizeof (outbuf) - 1 - outlen; + +- if (retlen == (size_t) -1) ++ if (conversions == (size_t) -1) + { + error (0, errno, _("Impossible to transliterate string %s"), str); + return NULL; + } + +- if (retlen == 0) +- return NULL; ++ if (transliterated_len == conversions) ++ { ++ bool found_valid = false; ++ ++ for (i = 0; i < transliterated_len && !found_valid; ++i) ++ found_valid = outbuf[i] != '?'; ++ ++ if (!found_valid) ++ return NULL; ++ } + +- return strndup (outbuf, sizeof (outbuf) - 1 - outlen); ++ return strndup (outbuf, transliterated_len); + } + #endif + + +From 89d267c6a25c06535b8431e40fc9b28ca5ec025e Mon Sep 17 00:00:00 2001 +From: Marco Trevisan (Treviño) +Date: Feb 23 2018 05:07:05 +0000 +Subject: [PATCH 5/11] locate: transliterate strings by char and do it on valid results only + + +To avoid false positives and not to break regex (in advanced mode +when a '?' is present, we had to escape it otherwise). + +--- + +diff --git a/src/locate.c b/src/locate.c +index 0775773..29f046e 100644 +--- a/src/locate.c ++++ b/src/locate.c +@@ -179,41 +179,63 @@ uppercase_string (struct obstack *obstack, const char *src) + static char * + transliterate_string (const char *str) + { +- size_t len; +- size_t conversions; +- size_t outlen; +- size_t transliterated_len; +- size_t i; ++ size_t strrlen; ++ size_t inlen; ++ size_t outleft; ++ size_t transliteratedlen; ++ bool changed; ++ char outbuf[PATH_MAX * 2]; + char *inbuf; + char *outptr; +- char outbuf[PATH_MAX * 2]; + ++ changed = false; + inbuf = (char *) str; ++ inlen = 1; + outptr = outbuf; +- len = strlen (str); ++ strrlen = strlen (str); ++ outleft = sizeof (outbuf) - 1; ++ transliteratedlen = 0; + +- outlen = sizeof (outbuf) - 1; +- conversions = iconv (iconv_context, &inbuf, &len, &outptr, &outlen); +- transliterated_len = sizeof (outbuf) - 1 - outlen; +- +- if (conversions == (size_t) -1) ++ while (inbuf < str + strrlen) + { +- error (0, errno, _("Impossible to transliterate string %s"), str); +- return NULL; +- } ++ size_t convertedlen; ++ size_t conversions; ++ size_t symbollen; + +- if (transliterated_len == conversions) +- { +- bool found_valid = false; ++ symbollen = inlen; ++ conversions = iconv (iconv_context, &inbuf, &inlen, &outptr, &outleft); ++ convertedlen = (outptr - outbuf) - transliteratedlen; + +- for (i = 0; i < transliterated_len && !found_valid; ++i) +- found_valid = outbuf[i] != '?'; ++ if (conversions == (size_t) -1) ++ { ++ if (errno == EILSEQ || errno == EINVAL) ++ { ++ inlen += 1; ++ continue; ++ } + +- if (!found_valid) +- return NULL; ++ error (0, errno, _("Impossible to transliterate string %s"), str); ++ return NULL; ++ } ++ else if (conversions == 1 && convertedlen == 1 && outptr[-1] == '?') ++ { ++ /* Transliteration is not possible for this symbol, so we just ++ reuse it as it is. */ ++ memcpy (outptr - 1, inbuf - symbollen, symbollen); ++ convertedlen = symbollen; ++ outptr += symbollen - 1; ++ outleft -= symbollen - 1; ++ } ++ else if (conversions > 0) ++ changed = true; ++ transliteratedlen += convertedlen; ++ inlen = 1; + } + +- return strndup (outbuf, transliterated_len); ++ if (changed != true) ++ return NULL; ++ ++ return strndup (outbuf, transliteratedlen); + } + #endif + + +From a6c0fb80d2f4be47a880123d0ad630581469da0e Mon Sep 17 00:00:00 2001 +From: Marco Trevisan (Treviño) +Date: Feb 23 2018 05:07:05 +0000 +Subject: [PATCH 6/11] locate: don't even try to transliterate ascii strings + + +This improves performances a lot, basically there's now +almost no timing difference between using -t parameter or not + +--- + +diff --git a/src/locate.c b/src/locate.c +index 29f046e..cd0e897 100644 +--- a/src/locate.c ++++ b/src/locate.c +@@ -183,19 +183,33 @@ transliterate_string (const char *str) + size_t inlen; + size_t outleft; + size_t transliteratedlen; ++ size_t i; + bool changed; ++ bool needsconversion; + char outbuf[PATH_MAX * 2]; + char *inbuf; + char *outptr; + ++ needsconversion = false; + changed = false; + inbuf = (char *) str; + inlen = 1; + outptr = outbuf; +- strrlen = strlen (str); ++ strrlen = 0; + outleft = sizeof (outbuf) - 1; + transliteratedlen = 0; + ++ for (i = 0; str[i]; i++) ++ { ++ if (str[i] & 0x80) ++ needsconversion = true; ++ ++ ++strrlen; ++ } ++ ++ if (needsconversion != true) ++ return NULL; ++ + while (inbuf < str + strrlen) + { + size_t convertedlen; + +From 9c1c52fea3182117c6c9b5372439abc3830d709e Mon Sep 17 00:00:00 2001 +From: Marco Trevisan (Treviño) +Date: Feb 23 2018 13:58:04 +0000 +Subject: [PATCH 7/11] locate: take in account inlen value when transliterating string + + +We don't want to pass to iconv a buffer with a size that might +overflow the actual string length. + +--- + +diff --git a/src/locate.c b/src/locate.c +index cd0e897..5442a88 100644 +--- a/src/locate.c ++++ b/src/locate.c +@@ -210,7 +210,7 @@ transliterate_string (const char *str) + if (needsconversion != true) + return NULL; + +- while (inbuf < str + strrlen) ++ while (inbuf + inlen <= str + strrlen) + { + size_t convertedlen; + size_t conversions; + +From 15d7fdfd1a25ca08518e2e656e4a7844feb378cd Mon Sep 17 00:00:00 2001 +From: Marco Trevisan (Treviño) +Date: Feb 23 2018 15:00:01 +0000 +Subject: [PATCH 8/11] locate: only allocate memory if needed in transliteration + + +We can be sure that the transliterated string will be +long at least like the input source plus an extra computed +on the number of chars that might be converted, as there' s +no transliteration that takes so much space. + +--- + +diff --git a/src/locate.c b/src/locate.c +index 5442a88..54f7c14 100644 +--- a/src/locate.c ++++ b/src/locate.c +@@ -183,33 +183,35 @@ transliterate_string (const char *str) + size_t inlen; + size_t outleft; + size_t transliteratedlen; ++ size_t nonasciibytes; + size_t i; + bool changed; +- bool needsconversion; +- char outbuf[PATH_MAX * 2]; + char *inbuf; ++ char *outbuf; + char *outptr; + +- needsconversion = false; + changed = false; +- inbuf = (char *) str; +- inlen = 1; +- outptr = outbuf; ++ nonasciibytes = 0; + strrlen = 0; +- outleft = sizeof (outbuf) - 1; +- transliteratedlen = 0; + + for (i = 0; str[i]; i++) + { + if (str[i] & 0x80) +- needsconversion = true; ++ ++nonasciibytes; + + ++strrlen; + } + +- if (needsconversion != true) ++ if (nonasciibytes < 1) + return NULL; + ++ inbuf = (char *) str; ++ inlen = 1; ++ transliteratedlen = 0; ++ outleft = strrlen + (nonasciibytes * 5); ++ outbuf = xmalloc (outleft); ++ outptr = outbuf; ++ + while (inbuf + inlen <= str + strrlen) + { + size_t convertedlen; +@@ -227,9 +229,9 @@ transliterate_string (const char *str) + inlen += 1; + continue; + } +- + error (0, errno, _("Impossible to transliterate string %s"), str); +- return NULL; ++ changed = false; ++ break; + } + else if (conversions == 1 && convertedlen == 1 && outptr[-1] == '?') + { +@@ -241,15 +243,21 @@ transliterate_string (const char *str) + outleft -= symbollen - 1; + } + else if (conversions > 0) +- changed = true; ++ { ++ changed = true; ++ } + transliteratedlen += convertedlen; + inlen = 1; + } + + if (changed != true) +- return NULL; ++ { ++ free (outbuf); ++ return NULL; ++ } + +- return strndup (outbuf, transliteratedlen); ++ outbuf[transliteratedlen] = '\0'; ++ return outbuf; + } + #endif + + +From 5fc996c39f9b1ed0c9bada1b9e60b0a7ee623803 Mon Sep 17 00:00:00 2001 +From: Marco Trevisan (Treviño) +Date: Feb 23 2018 22:31:44 +0000 +Subject: [PATCH 9/11] locate: allocate less space for transliterated, realloc if needed + + +In case we don't have enough space for the transliterated string +we can just reallocate some space for it, so far this is just +needed when chars such as `㎯' are found as they use 4 bytes, but +the transliteration is 7 bytes. As we can't make assumptions +about what will be the maximum transliteration length, we can +just repeatedly reallocate if needed. + +--- + +diff --git a/src/locate.c b/src/locate.c +index 54f7c14..de7fcd6 100644 +--- a/src/locate.c ++++ b/src/locate.c +@@ -197,7 +197,7 @@ transliterate_string (const char *str) + for (i = 0; str[i]; i++) + { + if (str[i] & 0x80) +- ++nonasciibytes; ++ ++nonasciibytes; + + ++strrlen; + } +@@ -208,7 +208,7 @@ transliterate_string (const char *str) + inbuf = (char *) str; + inlen = 1; + transliteratedlen = 0; +- outleft = strrlen + (nonasciibytes * 5); ++ outleft = strrlen + nonasciibytes; + outbuf = xmalloc (outleft); + outptr = outbuf; + +@@ -217,10 +217,12 @@ transliterate_string (const char *str) + size_t convertedlen; + size_t conversions; + size_t symbollen; ++ size_t outidx; + + symbollen = inlen; + conversions = iconv (iconv_context, &inbuf, &inlen, &outptr, &outleft); +- convertedlen = (outptr - outbuf) - transliteratedlen; ++ outidx = outptr - outbuf; ++ convertedlen = outidx - transliteratedlen; + + if (conversions == (size_t) -1) + { +@@ -229,6 +231,13 @@ transliterate_string (const char *str) + inlen += 1; + continue; + } ++ else if (errno == E2BIG) ++ { ++ outleft += 5; ++ outbuf = xrealloc (outbuf, outidx + outleft); ++ outptr = outbuf + outidx; ++ continue; ++ } + error (0, errno, _("Impossible to transliterate string %s"), str); + changed = false; + break; + +From 1359fb07f35c1d0e08b12070da0e3aa8faa0065a Mon Sep 17 00:00:00 2001 +From: Marco Trevisan (Treviño) +Date: Feb 23 2018 22:39:05 +0000 +Subject: [PATCH 10/11] locate: escape transliterated meta chars if regex is enabled + + +As per C-translit.h many chars could generate combinations +where regex meta chars are used. +For example: ⓪, ﬩,{ → ㎯, ﹙∗ +In such case, we need to escape the regex properly. + +--- + +diff --git a/src/locate.c b/src/locate.c +index de7fcd6..df18a0d 100644 +--- a/src/locate.c ++++ b/src/locate.c +@@ -51,6 +51,9 @@ Author: Miloslav Trmac */ + #include "db.h" + #include "lib.h" + ++#define BASIC_REGEX_META_CHARS ".^$*[]\\-" ++#define EXTENDED_REGEX_META_CHARS BASIC_REGEX_META_CHARS "{}|+?()" ++ + /* Check file existence before reporting them */ + static bool conf_check_existence; /* = false; */ + +@@ -176,6 +179,69 @@ uppercase_string (struct obstack *obstack, const char *src) + } + + #if HAVE_ICONV ++static bool ++char_needs_escape (const char c) ++{ ++ if (conf_match_regexp_basic != false && ++ strchr (BASIC_REGEX_META_CHARS, c) != NULL) ++ return true; ++ ++ if (conf_match_regexp_basic != true && ++ strchr (EXTENDED_REGEX_META_CHARS, c) != NULL) ++ return true; ++ ++ return false; ++} ++ ++static char * ++escape_regex (const char *str, size_t len, size_t *escaped_len) ++{ ++ size_t i, j; ++ size_t newlen; ++ bool foundmeta; ++ char *outbuf; ++ ++ if (escaped_len) ++ *escaped_len = 0; ++ ++ if (conf_match_regexp != true) ++ return NULL; ++ ++ foundmeta = false; ++ newlen = 0; ++ ++ for (i = 0; str[i] && i < len; ++i) ++ { ++ if (char_needs_escape (str[i])) ++ { ++ foundmeta = true; ++ ++newlen; ++ } ++ ++newlen; ++ } ++ ++ if (foundmeta != true || newlen == 0) ++ return NULL; ++ ++ outbuf = xmalloc (newlen + 1); ++ outbuf[newlen] = '\0'; ++ ++ for (i = 0, j = 0; i < len && j < newlen; ++i) ++ { ++ if (char_needs_escape (str[i])) ++ outbuf[j++] = '\\'; ++ outbuf[j++] = str[i]; ++ } ++ ++ if (escaped_len) ++ *escaped_len = newlen; ++ ++ return outbuf; ++} ++ ++/* Use iconv to transliterate the string into ASCII chars, when possible. ++ If a transliteration does not exist, we just use the actual symbol ++ not to loose precision. */ + static char * + transliterate_string (const char *str) + { +@@ -253,6 +319,32 @@ transliterate_string (const char *str) + } + else if (conversions > 0) + { ++ if (conf_match_regexp != false && convertedlen > 0) ++ { ++ char *converted; ++ char *escaped; ++ size_t escaped_len; ++ ++ converted = outptr - convertedlen; ++ escaped = escape_regex (converted, convertedlen, &escaped_len); ++ ++ if (escaped) ++ { ++ if (escaped_len > outleft) ++ { ++ outleft += (escaped_len - outleft); ++ outbuf = xrealloc (outbuf, outidx + outleft); ++ outptr = outbuf + outidx; ++ converted = outptr - convertedlen; ++ } ++ memcpy (converted, escaped, escaped_len); ++ free (escaped); ++ ++ outptr += (escaped_len - convertedlen); ++ outleft -= (escaped_len - convertedlen); ++ convertedlen = escaped_len; ++ } ++ } + changed = true; + } + transliteratedlen += convertedlen; + +From eae18a30fd302177f3cc12a2f353f7793b3e7a4e Mon Sep 17 00:00:00 2001 +From: Marco Trevisan (Treviño) +Date: Feb 23 2018 22:52:18 +0000 +Subject: [PATCH 11/11] locate: add --ignore-spaces option to ignore word separators + + +It allows to locate in files ignoring in-words separators, +such as punctuation and spaces, so searching "foo-bar" is +like searching for "foo bar", "foo_bar" or "foo?bar". + +This is not compatible with regex. + +--- + +diff --git a/doc/locate.1.in b/doc/locate.1.in +index 9c2e629..307a2b2 100644 +--- a/doc/locate.1.in ++++ b/doc/locate.1.in +@@ -126,6 +126,10 @@ and exit successfully. + Ignore case distinctions when matching patterns. + + .TP ++\fB\-p\fR, \fB\-\-ignore\-spaces\fR ++Ignore punctuation and spaces when matching patterns. ++ ++.TP + \fB\-t\fR, \fB\-\-transliterate\fR + Ignore accents using iconv transliteration when matching patterns. + +diff --git a/src/locate.c b/src/locate.c +index df18a0d..9d8691c 100644 +--- a/src/locate.c ++++ b/src/locate.c +@@ -70,6 +70,9 @@ static bool conf_ignore_case; /* = false; */ + /* Ignore accents when matching patterns */ + static bool conf_transliterate; /* = false; */ + ++/* Ignore puncts and spaces when matching patterns */ ++static bool conf_ingore_separators; /* = false; */ ++ + /* Return only files that match all patterns */ + static bool conf_match_all_patterns; /* = false; */ + +@@ -362,6 +365,57 @@ transliterate_string (const char *str) + } + #endif + ++/* Remove repeated punct or spaces from string and replaces ++ them using a space*/ ++static char * ++compress_string_separators (const char *str) ++{ ++ size_t strippedlen; ++ size_t i; ++ char *outbuf; ++ bool first; ++ bool changed; ++ ++ changed = false; ++ first = false; ++ strippedlen = 0; ++ outbuf = xmalloc (strlen (str) + 1); ++ ++ for (i = 0; str[i]; i++) ++ { ++ char cnt; ++ ++ cnt = str[i]; ++ if (isspace (cnt) || (ispunct (cnt) && cnt != '@')) ++ { ++ if (first != false) ++ { ++ changed = true; ++ continue; ++ } ++ if (cnt != ' ') ++ { ++ cnt = ' '; ++ changed = true; ++ } ++ first = true; ++ } ++ else ++ first = false; ++ ++ outbuf[strippedlen++] = cnt; ++ } ++ ++ if (!changed) ++ { ++ free (outbuf); ++ return NULL; ++ } ++ ++ outbuf[strippedlen] = '\0'; ++ return outbuf; ++} ++ + /* Write STRING to stdout, replace unprintable characters with '?' */ + static void + write_quoted (const char *string) +@@ -631,28 +685,32 @@ handle_path (const char *path, int *visible) + else + matching = path; + if (!string_matches_pattern (matching)) +-#if !HAVE_ICONV +- goto done; +-#else + { ++ char *altered_matching; + bool matched; + ++ altered_matching = NULL; + matched = false; ++ if (conf_ingore_separators != false) ++ altered_matching = compress_string_separators (matching); ++#if HAVE_ICONV + if (conf_transliterate != false) + { +- char *transliterated; +- +- transliterated = transliterate_string (matching); +- if (transliterated) +- { +- matched = string_matches_pattern (transliterated); +- free (transliterated); +- } ++ char *old_altered = altered_matching; ++ if (altered_matching) ++ matching = altered_matching; ++ altered_matching = transliterate_string (matching); ++ free (old_altered); ++ } ++#endif ++ if (altered_matching != NULL) ++ { ++ matched = string_matches_pattern (altered_matching); ++ free (altered_matching); + } + if (!matched) + goto done; + } +-#endif + /* Visible? */ + if (*visible == -1) + *visible = check_directory_perms (path) == 0; +@@ -856,6 +914,8 @@ help (void) + " -t, --transliterate ignore accents using iconv " + "transliteration when matching patterns\n" + #endif ++ " -p, --ignore-spaces ignore punctuation and spaces when " ++ "matching patterns\n" + " -l, --limit, -n LIMIT limit output (or counting) to LIMIT " + "entries\n" + " -m, --mmap ignored, for backward compatibility\n" +@@ -916,7 +976,7 @@ parse_options (int argc, char *argv[]) + { + int opt, idx; + +- opt = getopt_long (argc, argv, "0AHPLSVbcd:ehitl:mn:qr:sw", options, &idx); ++ opt = getopt_long (argc, argv, "0AHPLSVbcd:ehitpl:mn:qr:sw", options, &idx); + switch (opt) + { + case -1: +@@ -1001,6 +1061,10 @@ parse_options (int argc, char *argv[]) + conf_transliterate = true; + break; + ++ case 'p': ++ conf_ingore_separators = true; ++ break; ++ + case 'l': case 'n': + { + char *end; +@@ -1051,6 +1115,9 @@ parse_options (int argc, char *argv[]) + error (EXIT_FAILURE, 0, + _("non-option arguments are not allowed with --%s"), + conf_statistics != false ? "statistics" : "regexp"); ++ if (conf_ingore_separators != false && conf_match_regexp != false) ++ error (EXIT_FAILURE, 0, ++ _("ignore-spaces is not supported when using regexp")); + if (conf_transliterate != false) + { + #if HAVE_ICONV +@@ -1076,21 +1143,30 @@ parse_arguments (int argc, char *argv[]) + string_list_append (&conf_patterns, argv[i]); + if (conf_statistics == false && conf_patterns.len == 0) + error (EXIT_FAILURE, 0, _("no pattern to search for specified")); +-#if HAVE_ICONV +- if (conf_transliterate != false) ++ if (conf_transliterate != false || conf_ingore_separators != false) + { ++ char *altered_pattern; + size_t patterns_len = conf_patterns.len; +- char *transliterated; + + for (i = 0; i < patterns_len; i++) + { +- transliterated = transliterate_string (conf_patterns.entries[i]); +- +- if (transliterated) +- string_list_append (&conf_patterns, transliterated); ++ if (conf_ingore_separators != false) ++ { ++ altered_pattern = ++ compress_string_separators (conf_patterns.entries[i]); ++ if (altered_pattern) ++ conf_patterns.entries[i] = altered_pattern; ++ } ++#if HAVE_ICONV ++ if (conf_transliterate != false) ++ { ++ altered_pattern = transliterate_string (conf_patterns.entries[i]); ++ if (altered_pattern) ++ string_list_append (&conf_patterns, altered_pattern); ++ } ++#endif + } + } +-#endif + conf_patterns.entries = xnrealloc (conf_patterns.entries, conf_patterns.len, + sizeof (*conf_patterns.entries)); + if (conf_match_regexp != false) + only in patch2: unchanged: --- mlocate-0.26.orig/debian/patches/0001-Call-AM_PROG_AR.patch +++ mlocate-0.26/debian/patches/0001-Call-AM_PROG_AR.patch @@ -0,0 +1,33 @@ +From: =?utf-8?q?Miloslav_Trma=C4=8D?= +Date: Wed, 24 Apr 2013 22:25:37 +0200 +Subject: Call AM_PROG_AR + +--- + ChangeLog | 4 ++++ + configure.ac | 1 + + 2 files changed, 5 insertions(+) + +diff --git a/ChangeLog b/ChangeLog +index 2be635e..497a24f 100644 +--- a/ChangeLog ++++ b/ChangeLog +@@ -1,3 +1,7 @@ ++2013-04-24 Miloslav Trmač ++ ++ * configure.ac: Call AM_PROG_AR as automake wants us to. ++ + 2012-09-22 Miloslav Trmač + + * configure.ac: Relase 0.26. +diff --git a/configure.ac b/configure.ac +index f11bac6..04de37e 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -28,6 +28,7 @@ AM_INIT_AUTOMAKE([no-dist-gzip dist-xz no-define subdir-objects -Wall]) + AC_PROG_CC + gl_EARLY + AM_PROG_CC_C_O ++AM_PROG_AR + AC_PROG_RANLIB + # This interface seems to be undocumented, but bison uses it ;-) + AM_MISSING_PROG([AUTOM4TE], [autom4te]) only in patch2: unchanged: --- mlocate-0.26.orig/debian/patches/0002-Fix-typos-in-roff-directives.patch +++ mlocate-0.26/debian/patches/0002-Fix-typos-in-roff-directives.patch @@ -0,0 +1,51 @@ +From: =?utf-8?q?Miloslav_Trma=C4=8D?= +Date: Tue, 18 Jun 2013 23:24:23 +0200 +Subject: Fix typos in roff directives + +Patch by Eric S. Raymond . +--- + ChangeLog | 6 ++++++ + doc/mlocate.db.5 | 2 +- + doc/updatedb.conf.5.in | 2 +- + 3 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/ChangeLog b/ChangeLog +index 497a24f..e210a27 100644 +--- a/ChangeLog ++++ b/ChangeLog +@@ -1,3 +1,9 @@ ++2013-06-18 Miloslav Trmač ++ ++ * doc/mlocate.db.5 ++ * doc/updatedb.conf.5.in: Fix typos in roff directives. Patch by ++ Eric S. Raymond . ++ + 2013-04-24 Miloslav Trmač + + * configure.ac: Call AM_PROG_AR as automake wants us to. +diff --git a/doc/mlocate.db.5 b/doc/mlocate.db.5 +index 60814d6..a1b95e5 100644 +--- a/doc/mlocate.db.5 ++++ b/doc/mlocate.db.5 +@@ -53,7 +53,7 @@ function. + Currently defined variables are: + .TP + \fBprune_bind_mounts\fR +-A single entry, the value of \fbPRUNE_BIND_MOUNTS\fR; one of the strings ++A single entry, the value of \fBPRUNE_BIND_MOUNTS\fR; one of the strings + .B 0 + or \fB1\fR. + +diff --git a/doc/updatedb.conf.5.in b/doc/updatedb.conf.5.in +index 9b433a2..a8b5942 100644 +--- a/doc/updatedb.conf.5.in ++++ b/doc/updatedb.conf.5.in +@@ -87,7 +87,7 @@ in which the directory would be reported by + By default, no paths are skipped. + + .TP +-\fBPRUNE_BIND_MOUNTS\FR ++\fBPRUNE_BIND_MOUNTS\fR + One of the strings \fB0\fR, \fBno\fR, \fB1\fR or \fByes\fR. + If + .B PRUNE_BIND_MOUNTS only in patch2: unchanged: --- mlocate-0.26.orig/debian/patches/0003-Update-gettext-version-to-silence-autotools-warnings.patch +++ mlocate-0.26/debian/patches/0003-Update-gettext-version-to-silence-autotools-warnings.patch @@ -0,0 +1,35 @@ +From: =?utf-8?q?Miloslav_Trma=C4=8D?= +Date: Mon, 8 Jul 2013 22:34:48 +0200 +Subject: Update gettext version to silence autotools warnings + +--- + ChangeLog | 5 +++++ + configure.ac | 2 +- + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/ChangeLog b/ChangeLog +index e210a27..f570540 100644 +--- a/ChangeLog ++++ b/ChangeLog +@@ -1,3 +1,8 @@ ++2013-07-08 Miloslav Trmač ++ ++ * configure.ac: Update gettext version, which silences some autotools ++ warnings. ++ + 2013-06-18 Miloslav Trmač + + * doc/mlocate.db.5 +diff --git a/configure.ac b/configure.ac +index 04de37e..af1be52 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -37,7 +37,7 @@ gl_INIT + + # Checks for libraries. + AM_GNU_GETTEXT([external], [need-ngettext]) +-AM_GNU_GETTEXT_VERSION([0.17]) ++AM_GNU_GETTEXT_VERSION([0.18.2]) + + # Checks for header files. + only in patch2: unchanged: --- mlocate-0.26.orig/debian/patches/0004-Fix-a-typo-in-temporary-umask-value.patch +++ mlocate-0.26/debian/patches/0004-Fix-a-typo-in-temporary-umask-value.patch @@ -0,0 +1,36 @@ +From: =?utf-8?q?Miloslav_Trma=C4=8D?= +Date: Thu, 5 Dec 2013 00:18:24 +0100 +Subject: Fix a typo in temporary umask value + +Reported in https://fedorahosted.org/mlocate/ticket/27 . +--- + ChangeLog | 5 +++++ + src/updatedb.c | 2 +- + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/ChangeLog b/ChangeLog +index f570540..9bd074b 100644 +--- a/ChangeLog ++++ b/ChangeLog +@@ -1,3 +1,8 @@ ++2013-12-05 Miloslav Trmač ++ ++ * src/updatedb.c (new_db_setup_permissions): Fix a typo in the temporary ++ umask value. ++ + 2013-07-08 Miloslav Trmač + + * configure.ac: Update gettext version, which silences some autotools +diff --git a/src/updatedb.c b/src/updatedb.c +index 7cfa22d..1a177be 100644 +--- a/src/updatedb.c ++++ b/src/updatedb.c +@@ -957,7 +957,7 @@ new_db_setup_permissions (void) + { + mode_t mask; + +- mask = umask (S_IRWXU | S_IRWXG | S_IRWXG); ++ mask = umask (S_IRWXU | S_IRWXG | S_IRWXO); + umask (mask); + mode = ((S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH) + & ~mask); only in patch2: unchanged: --- mlocate-0.26.orig/debian/patches/0005-Rename-db.h-include-guard-to-be-more-specific.patch +++ mlocate-0.26/debian/patches/0005-Rename-db.h-include-guard-to-be-more-specific.patch @@ -0,0 +1,39 @@ +From: =?utf-8?q?Miloslav_Trma=C4=8D?= +Date: Mon, 14 Nov 2016 18:06:11 +0100 +Subject: Rename db.h include guard to be more specific + +https://fedorahosted.org/mlocate/ticket/37 +--- + ChangeLog | 6 ++++++ + src/db.h | 4 ++-- + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/ChangeLog b/ChangeLog +index 9bd074b..ca4b2bb 100644 +--- a/ChangeLog ++++ b/ChangeLog +@@ -1,3 +1,9 @@ ++2016-11-14 Miloslav Trmač ++ ++ * src/db.h (DB_H__): Rename to... ++ (MLOCATE_DB_H__): ... to minimize risk of conflicts when the header file ++ is installed to /usr/include. ++ + 2013-12-05 Miloslav Trmač + + * src/updatedb.c (new_db_setup_permissions): Fix a typo in the temporary +diff --git a/src/db.h b/src/db.h +index d3a3eff..39f609d 100644 +--- a/src/db.h ++++ b/src/db.h +@@ -13,8 +13,8 @@ PARTICULAR PURPOSE. + + Author: Miloslav Trmac */ + +-#ifndef DB_H__ +-#define DB_H__ ++#ifndef MLOCATE_DB_H__ ++#define MLOCATE_DB_H__ + + #include + only in patch2: unchanged: --- mlocate-0.26.orig/debian/patches/0006-locate-add-transliterate-support-using-iconv-to-matc.patch +++ mlocate-0.26/debian/patches/0006-locate-add-transliterate-support-using-iconv-to-matc.patch @@ -0,0 +1,455 @@ +From ecbf0283545f9955018a4e986c811983971ddd7c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= +Date: Fri, 23 Feb 2018 23:37:48 +0100 +Subject: [PATCH] locate: add --transliterate support using iconv to match + accented +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Forwarded: https://pagure.io/mlocate/pull-request/41 +Bug: https://pagure.io/mlocate/issue/40 +Bug-Ubuntu: https://bugs.launchpad.net/ubuntu/+source/mlocate/+bug/1752148 + +When enabled all search parameters and paths are transliterated +and used for matching. + +There's no need to check transliterated strings all the times if the +targets don't need transliteration. + +To avoid false positives and not to break regex (in advanced mode +when a '?' is present, we had to escape it otherwise). + +We can be sure that the transliterated string will be +long at least like the input source plus an extra computed +on the number of chars that might be converted, as there' s +no transliteration that takes so much space. + +In case we don't have enough space for the transliterated string +we can just reallocate some space for it, so far this is just +needed when chars such as `㎯' are found as they use 4 bytes, but +the transliteration is 7 bytes. As we can't make assumptions +about what will be the maximum transliteration length, we can +just repeatedly reallocate if needed. + +As per C-translit.h many chars could generate combinations +where regex meta chars are used. +For example: ⓪, ﬩,{ → ㎯, ﹙∗ +In such case, we need to escape the regex properly. +--- + AUTHORS | 1 + + configure.ac | 9 ++ + doc/locate.1.in | 7 +- + src/locate.c | 263 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- + 4 files changed, 278 insertions(+), 2 deletions(-) + +diff --git a/AUTHORS b/AUTHORS +index ade5106..8457862 100644 +--- a/AUTHORS ++++ b/AUTHORS +@@ -1 +1,2 @@ + Miloslav Trmac ++Marco Trevisan +diff --git a/configure.ac b/configure.ac +index 2c587ad..162f55c 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -39,6 +39,15 @@ gl_INIT + AM_GNU_GETTEXT([external], [need-ngettext]) + AM_GNU_GETTEXT_VERSION([0.18.2]) + ++AC_ARG_ENABLE(iconv, ++ AC_HELP_STRING([--disable-iconv], ++ [disable iconv support]),, ++ enable_iconv=yes) ++ ++if test x$enable_iconv = xyes; then ++ AM_ICONV ++fi ++ + # Checks for header files. + + # Checks for types. +diff --git a/doc/locate.1.in b/doc/locate.1.in +index b8d2826..9c2e629 100644 +--- a/doc/locate.1.in ++++ b/doc/locate.1.in +@@ -126,6 +126,10 @@ and exit successfully. + Ignore case distinctions when matching patterns. + + .TP ++\fB\-t\fR, \fB\-\-transliterate\fR ++Ignore accents using iconv transliteration when matching patterns. ++ ++.TP + \fB\-l\fR, \fB\-\-limit\fR, \fB\-n\fR \fILIMIT\fR + Exit successfully after finding + .I LIMIT +@@ -267,4 +271,5 @@ but it is added to other databases in this implementation and \fBslocate\fR. + Miloslav Trmac + + .SH SEE ALSO +-.BR updatedb (8) ++.BR updatedb (8), ++.BR iconv (1), +diff --git a/src/locate.c b/src/locate.c +index 87f2c15..df18a0d 100644 +--- a/src/locate.c ++++ b/src/locate.c +@@ -22,6 +22,10 @@ Author: Miloslav Trmac */ + #include + #include + #include ++#if HAVE_ICONV ++#include ++#include ++#endif + #include + #include + #include +@@ -47,6 +51,9 @@ Author: Miloslav Trmac */ + #include "db.h" + #include "lib.h" + ++#define BASIC_REGEX_META_CHARS ".^$*[]\\-" ++#define EXTENDED_REGEX_META_CHARS BASIC_REGEX_META_CHARS "{}|+?()" ++ + /* Check file existence before reporting them */ + static bool conf_check_existence; /* = false; */ + +@@ -60,6 +67,9 @@ static struct string_list conf_dbpath; /* = { 0, }; */ + /* Ignore case when matching patterns */ + static bool conf_ignore_case; /* = false; */ + ++/* Ignore accents when matching patterns */ ++static bool conf_transliterate; /* = false; */ ++ + /* Return only files that match all patterns */ + static bool conf_match_all_patterns; /* = false; */ + +@@ -108,6 +118,11 @@ static bool conf_quiet; /* = false; */ + /* Output only statistics */ + static bool conf_statistics; /* = false; */ + ++#if HAVE_ICONV ++/* Iconv context for transliterate conversion */ ++static iconv_t iconv_context; /* = NULL; */ ++#endif ++ + /* String utilities */ + + /* Convert SRC to upper-case wide string in OBSTACK; +@@ -163,6 +178,190 @@ uppercase_string (struct obstack *obstack, const char *src) + return res; + } + ++#if HAVE_ICONV ++static bool ++char_needs_escape (const char c) ++{ ++ if (conf_match_regexp_basic != false && ++ strchr (BASIC_REGEX_META_CHARS, c) != NULL) ++ return true; ++ ++ if (conf_match_regexp_basic != true && ++ strchr (EXTENDED_REGEX_META_CHARS, c) != NULL) ++ return true; ++ ++ return false; ++} ++ ++static char * ++escape_regex (const char *str, size_t len, size_t *escaped_len) ++{ ++ size_t i, j; ++ size_t newlen; ++ bool foundmeta; ++ char *outbuf; ++ ++ if (escaped_len) ++ *escaped_len = 0; ++ ++ if (conf_match_regexp != true) ++ return NULL; ++ ++ foundmeta = false; ++ newlen = 0; ++ ++ for (i = 0; str[i] && i < len; ++i) ++ { ++ if (char_needs_escape (str[i])) ++ { ++ foundmeta = true; ++ ++newlen; ++ } ++ ++newlen; ++ } ++ ++ if (foundmeta != true || newlen == 0) ++ return NULL; ++ ++ outbuf = xmalloc (newlen + 1); ++ outbuf[newlen] = '\0'; ++ ++ for (i = 0, j = 0; i < len && j < newlen; ++i) ++ { ++ if (char_needs_escape (str[i])) ++ outbuf[j++] = '\\'; ++ outbuf[j++] = str[i]; ++ } ++ ++ if (escaped_len) ++ *escaped_len = newlen; ++ ++ return outbuf; ++} ++ ++/* Use iconv to transliterate the string into ASCII chars, when possible. ++ If a transliteration does not exist, we just use the actual symbol ++ not to loose precision. */ ++static char * ++transliterate_string (const char *str) ++{ ++ size_t strrlen; ++ size_t inlen; ++ size_t outleft; ++ size_t transliteratedlen; ++ size_t nonasciibytes; ++ size_t i; ++ bool changed; ++ char *inbuf; ++ char *outbuf; ++ char *outptr; ++ ++ changed = false; ++ nonasciibytes = 0; ++ strrlen = 0; ++ ++ for (i = 0; str[i]; i++) ++ { ++ if (str[i] & 0x80) ++ ++nonasciibytes; ++ ++ ++strrlen; ++ } ++ ++ if (nonasciibytes < 1) ++ return NULL; ++ ++ inbuf = (char *) str; ++ inlen = 1; ++ transliteratedlen = 0; ++ outleft = strrlen + nonasciibytes; ++ outbuf = xmalloc (outleft); ++ outptr = outbuf; ++ ++ while (inbuf + inlen <= str + strrlen) ++ { ++ size_t convertedlen; ++ size_t conversions; ++ size_t symbollen; ++ size_t outidx; ++ ++ symbollen = inlen; ++ conversions = iconv (iconv_context, &inbuf, &inlen, &outptr, &outleft); ++ outidx = outptr - outbuf; ++ convertedlen = outidx - transliteratedlen; ++ ++ if (conversions == (size_t) -1) ++ { ++ if (errno == EILSEQ || errno == EINVAL) ++ { ++ inlen += 1; ++ continue; ++ } ++ else if (errno == E2BIG) ++ { ++ outleft += 5; ++ outbuf = xrealloc (outbuf, outidx + outleft); ++ outptr = outbuf + outidx; ++ continue; ++ } ++ error (0, errno, _("Impossible to transliterate string %s"), str); ++ changed = false; ++ break; ++ } ++ else if (conversions == 1 && convertedlen == 1 && outptr[-1] == '?') ++ { ++ /* Transliteration is not possible for this symbol, so we just ++ reuse it as it is. */ ++ memcpy (outptr - 1, inbuf - symbollen, symbollen); ++ convertedlen = symbollen; ++ outptr += symbollen - 1; ++ outleft -= symbollen - 1; ++ } ++ else if (conversions > 0) ++ { ++ if (conf_match_regexp != false && convertedlen > 0) ++ { ++ char *converted; ++ char *escaped; ++ size_t escaped_len; ++ ++ converted = outptr - convertedlen; ++ escaped = escape_regex (converted, convertedlen, &escaped_len); ++ ++ if (escaped) ++ { ++ if (escaped_len > outleft) ++ { ++ outleft += (escaped_len - outleft); ++ outbuf = xrealloc (outbuf, outidx + outleft); ++ outptr = outbuf + outidx; ++ converted = outptr - convertedlen; ++ } ++ memcpy (converted, escaped, escaped_len); ++ free (escaped); ++ ++ outptr += (escaped_len - convertedlen); ++ outleft -= (escaped_len - convertedlen); ++ convertedlen = escaped_len; ++ } ++ } ++ changed = true; ++ } ++ transliteratedlen += convertedlen; ++ inlen = 1; ++ } ++ ++ if (changed != true) ++ { ++ free (outbuf); ++ return NULL; ++ } ++ ++ outbuf[transliteratedlen] = '\0'; ++ return outbuf; ++} ++#endif ++ + /* Write STRING to stdout, replace unprintable characters with '?' */ + static void + write_quoted (const char *string) +@@ -432,7 +631,28 @@ handle_path (const char *path, int *visible) + else + matching = path; + if (!string_matches_pattern (matching)) ++#if !HAVE_ICONV + goto done; ++#else ++ { ++ bool matched; ++ ++ matched = false; ++ if (conf_transliterate != false) ++ { ++ char *transliterated; ++ ++ transliterated = transliterate_string (matching); ++ if (transliterated) ++ { ++ matched = string_matches_pattern (transliterated); ++ free (transliterated); ++ } ++ } ++ if (!matched) ++ goto done; ++ } ++#endif + /* Visible? */ + if (*visible == -1) + *visible = check_directory_perms (path) == 0; +@@ -632,6 +852,10 @@ help (void) + " -h, --help print this help\n" + " -i, --ignore-case ignore case distinctions when matching " + "patterns\n" ++#if HAVE_ICONV ++ " -t, --transliterate ignore accents using iconv " ++ "transliteration when matching patterns\n" ++#endif + " -l, --limit, -n LIMIT limit output (or counting) to LIMIT " + "entries\n" + " -m, --mmap ignored, for backward compatibility\n" +@@ -669,6 +893,7 @@ parse_options (int argc, char *argv[]) + { "follow", no_argument, NULL, 'L' }, + { "help", no_argument, NULL, 'h' }, + { "ignore-case", no_argument, NULL, 'i' }, ++ { "transliterate", no_argument, NULL, 't' }, + { "limit", required_argument, NULL, 'l' }, + { "mmap", no_argument, NULL, 'm' }, + { "quiet", no_argument, NULL, 'q' }, +@@ -691,7 +916,7 @@ parse_options (int argc, char *argv[]) + { + int opt, idx; + +- opt = getopt_long (argc, argv, "0AHPLSVbcd:ehil:mn:qr:sw", options, &idx); ++ opt = getopt_long (argc, argv, "0AHPLSVbcd:ehitl:mn:qr:sw", options, &idx); + switch (opt) + { + case -1: +@@ -772,6 +997,10 @@ parse_options (int argc, char *argv[]) + conf_ignore_case = true; + break; + ++ case 't': ++ conf_transliterate = true; ++ break; ++ + case 'l': case 'n': + { + char *end; +@@ -822,6 +1051,19 @@ parse_options (int argc, char *argv[]) + error (EXIT_FAILURE, 0, + _("non-option arguments are not allowed with --%s"), + conf_statistics != false ? "statistics" : "regexp"); ++ if (conf_transliterate != false) ++ { ++#if HAVE_ICONV ++ iconv_context = iconv_open ("ASCII//TRANSLIT", nl_langinfo (CODESET)); ++ if (iconv_context == (iconv_t) -1) ++ error (EXIT_FAILURE, errno, _("can not do transliteration between " \ ++ "these locales: `%s' and `ASCII'"), ++ nl_langinfo (CODESET)); ++#else ++ error (EXIT_FAILURE, errno, _("transliteration support is not supported" \ ++ "by this build of %s"), program_name); ++#endif ++ } + } + + /* Parse arguments in ARGC, ARGV. Exit on error. */ +@@ -834,6 +1076,21 @@ parse_arguments (int argc, char *argv[]) + string_list_append (&conf_patterns, argv[i]); + if (conf_statistics == false && conf_patterns.len == 0) + error (EXIT_FAILURE, 0, _("no pattern to search for specified")); ++#if HAVE_ICONV ++ if (conf_transliterate != false) ++ { ++ size_t patterns_len = conf_patterns.len; ++ char *transliterated; ++ ++ for (i = 0; i < patterns_len; i++) ++ { ++ transliterated = transliterate_string (conf_patterns.entries[i]); ++ ++ if (transliterated) ++ string_list_append (&conf_patterns, transliterated); ++ } ++ } ++#endif + conf_patterns.entries = xnrealloc (conf_patterns.entries, conf_patterns.len, + sizeof (*conf_patterns.entries)); + if (conf_match_regexp != false) +@@ -1042,6 +1299,10 @@ main (int argc, char *argv[]) + handle_dbpath_entry (conf_dbpath.entries[i]); + } + done: ++#if HAVE_ICONV ++ if (conf_transliterate != false && iconv_context) ++ iconv_close (iconv_context); ++#endif + if (conf_output_count != false) + printf ("%ju\n", matches_found); + if (conf_statistics != false || matches_found != 0) +-- +2.7.4 + only in patch2: unchanged: --- mlocate-0.26.orig/debian/patches/0007-locate-add-ignore-spaces-option-to-ignore-word-separ.patch +++ mlocate-0.26/debian/patches/0007-locate-add-ignore-spaces-option-to-ignore-word-separ.patch @@ -0,0 +1,244 @@ +From: =?utf-8?b?Ik1hcmNvIFRyZXZpc2FuIChUcmV2acOxbyki?= +Date: Fri, 23 Feb 2018 18:28:51 +0100 +Subject: locate: add --ignore-spaces option to ignore word separators +Forwarded: https://pagure.io/mlocate/pull-request/42 +Bug-Ubuntu: https://bugs.launchpad.net/ubuntu/+source/mlocate/+bug/1752148 + +It allows to locate in files ignoring in-words separators, +such as punctuation and spaces, so searching "foo-bar" is +like searching for "foo bar", "foo_bar" or "foo?bar". + +This is not compatible with regex. +--- + doc/locate.1.in | 4 ++ + src/locate.c | 122 +++++++++++++++++++++++++++++++++++++++++++++----------- + 2 files changed, 103 insertions(+), 23 deletions(-) + +diff --git a/doc/locate.1.in b/doc/locate.1.in +index 9c2e629..307a2b2 100644 +--- a/doc/locate.1.in ++++ b/doc/locate.1.in +@@ -126,6 +126,10 @@ and exit successfully. + Ignore case distinctions when matching patterns. + + .TP ++\fB\-p\fR, \fB\-\-ignore\-spaces\fR ++Ignore punctuation and spaces when matching patterns. ++ ++.TP + \fB\-t\fR, \fB\-\-transliterate\fR + Ignore accents using iconv transliteration when matching patterns. + +diff --git a/src/locate.c b/src/locate.c +index df18a0d..d4a6e9d 100644 +--- a/src/locate.c ++++ b/src/locate.c +@@ -70,6 +70,9 @@ static bool conf_ignore_case; /* = false; */ + /* Ignore accents when matching patterns */ + static bool conf_transliterate; /* = false; */ + ++/* Ignore puncts and spaces when matching patterns */ ++static bool conf_ingore_separators; /* = false; */ ++ + /* Return only files that match all patterns */ + static bool conf_match_all_patterns; /* = false; */ + +@@ -329,7 +332,7 @@ transliterate_string (const char *str) + escaped = escape_regex (converted, convertedlen, &escaped_len); + + if (escaped) +- { ++ { + if (escaped_len > outleft) + { + outleft += (escaped_len - outleft); +@@ -343,7 +346,7 @@ transliterate_string (const char *str) + outptr += (escaped_len - convertedlen); + outleft -= (escaped_len - convertedlen); + convertedlen = escaped_len; +- } ++ } + } + changed = true; + } +@@ -362,6 +365,57 @@ transliterate_string (const char *str) + } + #endif + ++/* Remove repeated punct or spaces from string and replaces ++ them using a space*/ ++static char * ++compress_string_separators (const char *str) ++{ ++ size_t strippedlen; ++ size_t i; ++ char *outbuf; ++ bool first; ++ bool changed; ++ ++ changed = false; ++ first = false; ++ strippedlen = 0; ++ outbuf = xmalloc (strlen (str) + 1); ++ ++ for (i = 0; str[i]; i++) ++ { ++ char cnt; ++ ++ cnt = str[i]; ++ if (isspace (cnt) || (ispunct (cnt) && cnt != '@')) ++ { ++ if (first != false) ++ { ++ changed = true; ++ continue; ++ } ++ if (cnt != ' ') ++ { ++ cnt = ' '; ++ changed = true; ++ } ++ first = true; ++ } ++ else ++ first = false; ++ ++ outbuf[strippedlen++] = cnt; ++ } ++ ++ if (!changed) ++ { ++ free (outbuf); ++ return NULL; ++ } ++ ++ outbuf[strippedlen] = '\0'; ++ return outbuf; ++} ++ + /* Write STRING to stdout, replace unprintable characters with '?' */ + static void + write_quoted (const char *string) +@@ -631,28 +685,32 @@ handle_path (const char *path, int *visible) + else + matching = path; + if (!string_matches_pattern (matching)) +-#if !HAVE_ICONV +- goto done; +-#else + { ++ char *altered_matching; + bool matched; + ++ altered_matching = NULL; + matched = false; ++ if (conf_ingore_separators != false) ++ altered_matching = compress_string_separators (matching); ++#if HAVE_ICONV + if (conf_transliterate != false) + { +- char *transliterated; +- +- transliterated = transliterate_string (matching); +- if (transliterated) +- { +- matched = string_matches_pattern (transliterated); +- free (transliterated); +- } ++ char *old_altered = altered_matching; ++ if (altered_matching) ++ matching = altered_matching; ++ altered_matching = transliterate_string (matching); ++ free (old_altered); ++ } ++#endif ++ if (altered_matching != NULL) ++ { ++ matched = string_matches_pattern (altered_matching); ++ free (altered_matching); + } + if (!matched) + goto done; + } +-#endif + /* Visible? */ + if (*visible == -1) + *visible = check_directory_perms (path) == 0; +@@ -856,6 +914,8 @@ help (void) + " -t, --transliterate ignore accents using iconv " + "transliteration when matching patterns\n" + #endif ++ " -p, --ignore-spaces ignore punctuation and spaces when " ++ "matching patterns\n" + " -l, --limit, -n LIMIT limit output (or counting) to LIMIT " + "entries\n" + " -m, --mmap ignored, for backward compatibility\n" +@@ -916,7 +976,7 @@ parse_options (int argc, char *argv[]) + { + int opt, idx; + +- opt = getopt_long (argc, argv, "0AHPLSVbcd:ehitl:mn:qr:sw", options, &idx); ++ opt = getopt_long (argc, argv, "0AHPLSVbcd:ehitpl:mn:qr:sw", options, &idx); + switch (opt) + { + case -1: +@@ -1001,6 +1061,10 @@ parse_options (int argc, char *argv[]) + conf_transliterate = true; + break; + ++ case 'p': ++ conf_ingore_separators = true; ++ break; ++ + case 'l': case 'n': + { + char *end; +@@ -1051,6 +1115,9 @@ parse_options (int argc, char *argv[]) + error (EXIT_FAILURE, 0, + _("non-option arguments are not allowed with --%s"), + conf_statistics != false ? "statistics" : "regexp"); ++ if (conf_ingore_separators != false && conf_match_regexp != false) ++ error (EXIT_FAILURE, 0, ++ _("ignore-spaces is not supported when using regexp")); + if (conf_transliterate != false) + { + #if HAVE_ICONV +@@ -1076,21 +1143,30 @@ parse_arguments (int argc, char *argv[]) + string_list_append (&conf_patterns, argv[i]); + if (conf_statistics == false && conf_patterns.len == 0) + error (EXIT_FAILURE, 0, _("no pattern to search for specified")); +-#if HAVE_ICONV +- if (conf_transliterate != false) ++ if (conf_transliterate != false || conf_ingore_separators != false) + { ++ char *altered_pattern; + size_t patterns_len = conf_patterns.len; +- char *transliterated; + + for (i = 0; i < patterns_len; i++) + { +- transliterated = transliterate_string (conf_patterns.entries[i]); +- +- if (transliterated) +- string_list_append (&conf_patterns, transliterated); ++ if (conf_ingore_separators != false) ++ { ++ altered_pattern = ++ compress_string_separators (conf_patterns.entries[i]); ++ if (altered_pattern) ++ conf_patterns.entries[i] = altered_pattern; ++ } ++#if HAVE_ICONV ++ if (conf_transliterate != false) ++ { ++ altered_pattern = transliterate_string (conf_patterns.entries[i]); ++ if (altered_pattern) ++ string_list_append (&conf_patterns, altered_pattern); ++ } ++#endif + } + } +-#endif + conf_patterns.entries = xnrealloc (conf_patterns.entries, conf_patterns.len, + sizeof (*conf_patterns.entries)); + if (conf_match_regexp != false) only in patch2: unchanged: --- mlocate-0.26.orig/debian/patches/series +++ mlocate-0.26/debian/patches/series @@ -0,0 +1,7 @@ +0001-Call-AM_PROG_AR.patch +0002-Fix-typos-in-roff-directives.patch +0003-Update-gettext-version-to-silence-autotools-warnings.patch +0004-Fix-a-typo-in-temporary-umask-value.patch +0005-Rename-db.h-include-guard-to-be-more-specific.patch +0006-locate-add-transliterate-support-using-iconv-to-matc.patch +0007-locate-add-ignore-spaces-option-to-ignore-word-separ.patch only in patch2: unchanged: --- mlocate-0.26.orig/locate-add-ignore-spaces-option.patch +++ mlocate-0.26/locate-add-ignore-spaces-option.patch @@ -0,0 +1,246 @@ +From bbb772237df06fb9f4da5c0d91796ac91fbf691d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= +Date: Fri, 23 Feb 2018 18:28:51 +0100 +Subject: [PATCH] locate: add --ignore-spaces option to ignore word separators + +It allows to locate in files ignoring in-words separators, +such as punctuation and spaces, so searching "foo-bar" is +like searching for "foo bar", "foo_bar" or "foo?bar". + +This is not compatible with regex. +--- + doc/locate.1.in | 4 ++ + src/locate.c | 122 +++++++++++++++++++++++++++++++++++++++++++++----------- + 2 files changed, 103 insertions(+), 23 deletions(-) + +diff --git a/doc/locate.1.in b/doc/locate.1.in +index 9c2e629..307a2b2 100644 +--- a/doc/locate.1.in ++++ b/doc/locate.1.in +@@ -126,6 +126,10 @@ and exit successfully. + Ignore case distinctions when matching patterns. + + .TP ++\fB\-p\fR, \fB\-\-ignore\-spaces\fR ++Ignore punctuation and spaces when matching patterns. ++ ++.TP + \fB\-t\fR, \fB\-\-transliterate\fR + Ignore accents using iconv transliteration when matching patterns. + +diff --git a/src/locate.c b/src/locate.c +index df18a0d..d4a6e9d 100644 +--- a/src/locate.c ++++ b/src/locate.c +@@ -70,6 +70,9 @@ static bool conf_ignore_case; /* = false; */ + /* Ignore accents when matching patterns */ + static bool conf_transliterate; /* = false; */ + ++/* Ignore puncts and spaces when matching patterns */ ++static bool conf_ingore_separators; /* = false; */ ++ + /* Return only files that match all patterns */ + static bool conf_match_all_patterns; /* = false; */ + +@@ -329,7 +332,7 @@ transliterate_string (const char *str) + escaped = escape_regex (converted, convertedlen, &escaped_len); + + if (escaped) +- { ++ { + if (escaped_len > outleft) + { + outleft += (escaped_len - outleft); +@@ -343,7 +346,7 @@ transliterate_string (const char *str) + outptr += (escaped_len - convertedlen); + outleft -= (escaped_len - convertedlen); + convertedlen = escaped_len; +- } ++ } + } + changed = true; + } +@@ -362,6 +365,57 @@ transliterate_string (const char *str) + } + #endif + ++/* Remove repeated punct or spaces from string and replaces ++ them using a space*/ ++static char * ++compress_string_separators (const char *str) ++{ ++ size_t strippedlen; ++ size_t i; ++ char *outbuf; ++ bool first; ++ bool changed; ++ ++ changed = false; ++ first = false; ++ strippedlen = 0; ++ outbuf = xmalloc (strlen (str) + 1); ++ ++ for (i = 0; str[i]; i++) ++ { ++ char cnt; ++ ++ cnt = str[i]; ++ if (isspace (cnt) || (ispunct (cnt) && cnt != '@')) ++ { ++ if (first != false) ++ { ++ changed = true; ++ continue; ++ } ++ if (cnt != ' ') ++ { ++ cnt = ' '; ++ changed = true; ++ } ++ first = true; ++ } ++ else ++ first = false; ++ ++ outbuf[strippedlen++] = cnt; ++ } ++ ++ if (!changed) ++ { ++ free (outbuf); ++ return NULL; ++ } ++ ++ outbuf[strippedlen] = '\0'; ++ return outbuf; ++} ++ + /* Write STRING to stdout, replace unprintable characters with '?' */ + static void + write_quoted (const char *string) +@@ -631,28 +685,32 @@ handle_path (const char *path, int *visible) + else + matching = path; + if (!string_matches_pattern (matching)) +-#if !HAVE_ICONV +- goto done; +-#else + { ++ char *altered_matching; + bool matched; + ++ altered_matching = NULL; + matched = false; ++ if (conf_ingore_separators != false) ++ altered_matching = compress_string_separators (matching); ++#if HAVE_ICONV + if (conf_transliterate != false) + { +- char *transliterated; +- +- transliterated = transliterate_string (matching); +- if (transliterated) +- { +- matched = string_matches_pattern (transliterated); +- free (transliterated); +- } ++ char *old_altered = altered_matching; ++ if (altered_matching) ++ matching = altered_matching; ++ altered_matching = transliterate_string (matching); ++ free (old_altered); ++ } ++#endif ++ if (altered_matching != NULL) ++ { ++ matched = string_matches_pattern (altered_matching); ++ free (altered_matching); + } + if (!matched) + goto done; + } +-#endif + /* Visible? */ + if (*visible == -1) + *visible = check_directory_perms (path) == 0; +@@ -856,6 +914,8 @@ help (void) + " -t, --transliterate ignore accents using iconv " + "transliteration when matching patterns\n" + #endif ++ " -p, --ignore-spaces ignore punctuation and spaces when " ++ "matching patterns\n" + " -l, --limit, -n LIMIT limit output (or counting) to LIMIT " + "entries\n" + " -m, --mmap ignored, for backward compatibility\n" +@@ -916,7 +976,7 @@ parse_options (int argc, char *argv[]) + { + int opt, idx; + +- opt = getopt_long (argc, argv, "0AHPLSVbcd:ehitl:mn:qr:sw", options, &idx); ++ opt = getopt_long (argc, argv, "0AHPLSVbcd:ehitpl:mn:qr:sw", options, &idx); + switch (opt) + { + case -1: +@@ -1001,6 +1061,10 @@ parse_options (int argc, char *argv[]) + conf_transliterate = true; + break; + ++ case 'p': ++ conf_ingore_separators = true; ++ break; ++ + case 'l': case 'n': + { + char *end; +@@ -1051,6 +1115,9 @@ parse_options (int argc, char *argv[]) + error (EXIT_FAILURE, 0, + _("non-option arguments are not allowed with --%s"), + conf_statistics != false ? "statistics" : "regexp"); ++ if (conf_ingore_separators != false && conf_match_regexp != false) ++ error (EXIT_FAILURE, 0, ++ _("ignore-spaces is not supported when using regexp")); + if (conf_transliterate != false) + { + #if HAVE_ICONV +@@ -1076,21 +1143,30 @@ parse_arguments (int argc, char *argv[]) + string_list_append (&conf_patterns, argv[i]); + if (conf_statistics == false && conf_patterns.len == 0) + error (EXIT_FAILURE, 0, _("no pattern to search for specified")); +-#if HAVE_ICONV +- if (conf_transliterate != false) ++ if (conf_transliterate != false || conf_ingore_separators != false) + { ++ char *altered_pattern; + size_t patterns_len = conf_patterns.len; +- char *transliterated; + + for (i = 0; i < patterns_len; i++) + { +- transliterated = transliterate_string (conf_patterns.entries[i]); +- +- if (transliterated) +- string_list_append (&conf_patterns, transliterated); ++ if (conf_ingore_separators != false) ++ { ++ altered_pattern = ++ compress_string_separators (conf_patterns.entries[i]); ++ if (altered_pattern) ++ conf_patterns.entries[i] = altered_pattern; ++ } ++#if HAVE_ICONV ++ if (conf_transliterate != false) ++ { ++ altered_pattern = transliterate_string (conf_patterns.entries[i]); ++ if (altered_pattern) ++ string_list_append (&conf_patterns, altered_pattern); ++ } ++#endif + } + } +-#endif + conf_patterns.entries = xnrealloc (conf_patterns.entries, conf_patterns.len, + sizeof (*conf_patterns.entries)); + if (conf_match_regexp != false) +-- +2.7.4 + only in patch2: unchanged: --- mlocate-0.26.orig/locate-add-transliterate-support.patch +++ mlocate-0.26/locate-add-transliterate-support.patch @@ -0,0 +1,452 @@ +From ecbf0283545f9955018a4e986c811983971ddd7c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= +Date: Fri, 23 Feb 2018 23:37:48 +0100 +Subject: [PATCH] locate: add --transliterate support using iconv to match + accented +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When enabled all search parameters and paths are transliterated +and used for matching. + +There's no need to check transliterated strings all the times if the +targets don't need transliteration. + +To avoid false positives and not to break regex (in advanced mode +when a '?' is present, we had to escape it otherwise). + +We can be sure that the transliterated string will be +long at least like the input source plus an extra computed +on the number of chars that might be converted, as there' s +no transliteration that takes so much space. + +In case we don't have enough space for the transliterated string +we can just reallocate some space for it, so far this is just +needed when chars such as `㎯' are found as they use 4 bytes, but +the transliteration is 7 bytes. As we can't make assumptions +about what will be the maximum transliteration length, we can +just repeatedly reallocate if needed. + +As per C-translit.h many chars could generate combinations +where regex meta chars are used. +For example: ⓪, ﬩,{ → ㎯, ﹙∗ +In such case, we need to escape the regex properly. +--- + AUTHORS | 1 + + configure.ac | 9 ++ + doc/locate.1.in | 7 +- + src/locate.c | 263 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- + 4 files changed, 278 insertions(+), 2 deletions(-) + +diff --git a/AUTHORS b/AUTHORS +index ade5106..8457862 100644 +--- a/AUTHORS ++++ b/AUTHORS +@@ -1 +1,2 @@ + Miloslav Trmac ++Marco Trevisan +diff --git a/configure.ac b/configure.ac +index 2c587ad..162f55c 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -39,6 +39,15 @@ gl_INIT + AM_GNU_GETTEXT([external], [need-ngettext]) + AM_GNU_GETTEXT_VERSION([0.18.2]) + ++AC_ARG_ENABLE(iconv, ++ AC_HELP_STRING([--disable-iconv], ++ [disable iconv support]),, ++ enable_iconv=yes) ++ ++if test x$enable_iconv = xyes; then ++ AM_ICONV ++fi ++ + # Checks for header files. + + # Checks for types. +diff --git a/doc/locate.1.in b/doc/locate.1.in +index b8d2826..9c2e629 100644 +--- a/doc/locate.1.in ++++ b/doc/locate.1.in +@@ -126,6 +126,10 @@ and exit successfully. + Ignore case distinctions when matching patterns. + + .TP ++\fB\-t\fR, \fB\-\-transliterate\fR ++Ignore accents using iconv transliteration when matching patterns. ++ ++.TP + \fB\-l\fR, \fB\-\-limit\fR, \fB\-n\fR \fILIMIT\fR + Exit successfully after finding + .I LIMIT +@@ -267,4 +271,5 @@ but it is added to other databases in this implementation and \fBslocate\fR. + Miloslav Trmac + + .SH SEE ALSO +-.BR updatedb (8) ++.BR updatedb (8), ++.BR iconv (1), +diff --git a/src/locate.c b/src/locate.c +index 87f2c15..df18a0d 100644 +--- a/src/locate.c ++++ b/src/locate.c +@@ -22,6 +22,10 @@ Author: Miloslav Trmac */ + #include + #include + #include ++#if HAVE_ICONV ++#include ++#include ++#endif + #include + #include + #include +@@ -47,6 +51,9 @@ Author: Miloslav Trmac */ + #include "db.h" + #include "lib.h" + ++#define BASIC_REGEX_META_CHARS ".^$*[]\\-" ++#define EXTENDED_REGEX_META_CHARS BASIC_REGEX_META_CHARS "{}|+?()" ++ + /* Check file existence before reporting them */ + static bool conf_check_existence; /* = false; */ + +@@ -60,6 +67,9 @@ static struct string_list conf_dbpath; /* = { 0, }; */ + /* Ignore case when matching patterns */ + static bool conf_ignore_case; /* = false; */ + ++/* Ignore accents when matching patterns */ ++static bool conf_transliterate; /* = false; */ ++ + /* Return only files that match all patterns */ + static bool conf_match_all_patterns; /* = false; */ + +@@ -108,6 +118,11 @@ static bool conf_quiet; /* = false; */ + /* Output only statistics */ + static bool conf_statistics; /* = false; */ + ++#if HAVE_ICONV ++/* Iconv context for transliterate conversion */ ++static iconv_t iconv_context; /* = NULL; */ ++#endif ++ + /* String utilities */ + + /* Convert SRC to upper-case wide string in OBSTACK; +@@ -163,6 +178,190 @@ uppercase_string (struct obstack *obstack, const char *src) + return res; + } + ++#if HAVE_ICONV ++static bool ++char_needs_escape (const char c) ++{ ++ if (conf_match_regexp_basic != false && ++ strchr (BASIC_REGEX_META_CHARS, c) != NULL) ++ return true; ++ ++ if (conf_match_regexp_basic != true && ++ strchr (EXTENDED_REGEX_META_CHARS, c) != NULL) ++ return true; ++ ++ return false; ++} ++ ++static char * ++escape_regex (const char *str, size_t len, size_t *escaped_len) ++{ ++ size_t i, j; ++ size_t newlen; ++ bool foundmeta; ++ char *outbuf; ++ ++ if (escaped_len) ++ *escaped_len = 0; ++ ++ if (conf_match_regexp != true) ++ return NULL; ++ ++ foundmeta = false; ++ newlen = 0; ++ ++ for (i = 0; str[i] && i < len; ++i) ++ { ++ if (char_needs_escape (str[i])) ++ { ++ foundmeta = true; ++ ++newlen; ++ } ++ ++newlen; ++ } ++ ++ if (foundmeta != true || newlen == 0) ++ return NULL; ++ ++ outbuf = xmalloc (newlen + 1); ++ outbuf[newlen] = '\0'; ++ ++ for (i = 0, j = 0; i < len && j < newlen; ++i) ++ { ++ if (char_needs_escape (str[i])) ++ outbuf[j++] = '\\'; ++ outbuf[j++] = str[i]; ++ } ++ ++ if (escaped_len) ++ *escaped_len = newlen; ++ ++ return outbuf; ++} ++ ++/* Use iconv to transliterate the string into ASCII chars, when possible. ++ If a transliteration does not exist, we just use the actual symbol ++ not to loose precision. */ ++static char * ++transliterate_string (const char *str) ++{ ++ size_t strrlen; ++ size_t inlen; ++ size_t outleft; ++ size_t transliteratedlen; ++ size_t nonasciibytes; ++ size_t i; ++ bool changed; ++ char *inbuf; ++ char *outbuf; ++ char *outptr; ++ ++ changed = false; ++ nonasciibytes = 0; ++ strrlen = 0; ++ ++ for (i = 0; str[i]; i++) ++ { ++ if (str[i] & 0x80) ++ ++nonasciibytes; ++ ++ ++strrlen; ++ } ++ ++ if (nonasciibytes < 1) ++ return NULL; ++ ++ inbuf = (char *) str; ++ inlen = 1; ++ transliteratedlen = 0; ++ outleft = strrlen + nonasciibytes; ++ outbuf = xmalloc (outleft); ++ outptr = outbuf; ++ ++ while (inbuf + inlen <= str + strrlen) ++ { ++ size_t convertedlen; ++ size_t conversions; ++ size_t symbollen; ++ size_t outidx; ++ ++ symbollen = inlen; ++ conversions = iconv (iconv_context, &inbuf, &inlen, &outptr, &outleft); ++ outidx = outptr - outbuf; ++ convertedlen = outidx - transliteratedlen; ++ ++ if (conversions == (size_t) -1) ++ { ++ if (errno == EILSEQ || errno == EINVAL) ++ { ++ inlen += 1; ++ continue; ++ } ++ else if (errno == E2BIG) ++ { ++ outleft += 5; ++ outbuf = xrealloc (outbuf, outidx + outleft); ++ outptr = outbuf + outidx; ++ continue; ++ } ++ error (0, errno, _("Impossible to transliterate string %s"), str); ++ changed = false; ++ break; ++ } ++ else if (conversions == 1 && convertedlen == 1 && outptr[-1] == '?') ++ { ++ /* Transliteration is not possible for this symbol, so we just ++ reuse it as it is. */ ++ memcpy (outptr - 1, inbuf - symbollen, symbollen); ++ convertedlen = symbollen; ++ outptr += symbollen - 1; ++ outleft -= symbollen - 1; ++ } ++ else if (conversions > 0) ++ { ++ if (conf_match_regexp != false && convertedlen > 0) ++ { ++ char *converted; ++ char *escaped; ++ size_t escaped_len; ++ ++ converted = outptr - convertedlen; ++ escaped = escape_regex (converted, convertedlen, &escaped_len); ++ ++ if (escaped) ++ { ++ if (escaped_len > outleft) ++ { ++ outleft += (escaped_len - outleft); ++ outbuf = xrealloc (outbuf, outidx + outleft); ++ outptr = outbuf + outidx; ++ converted = outptr - convertedlen; ++ } ++ memcpy (converted, escaped, escaped_len); ++ free (escaped); ++ ++ outptr += (escaped_len - convertedlen); ++ outleft -= (escaped_len - convertedlen); ++ convertedlen = escaped_len; ++ } ++ } ++ changed = true; ++ } ++ transliteratedlen += convertedlen; ++ inlen = 1; ++ } ++ ++ if (changed != true) ++ { ++ free (outbuf); ++ return NULL; ++ } ++ ++ outbuf[transliteratedlen] = '\0'; ++ return outbuf; ++} ++#endif ++ + /* Write STRING to stdout, replace unprintable characters with '?' */ + static void + write_quoted (const char *string) +@@ -432,7 +631,28 @@ handle_path (const char *path, int *visible) + else + matching = path; + if (!string_matches_pattern (matching)) ++#if !HAVE_ICONV + goto done; ++#else ++ { ++ bool matched; ++ ++ matched = false; ++ if (conf_transliterate != false) ++ { ++ char *transliterated; ++ ++ transliterated = transliterate_string (matching); ++ if (transliterated) ++ { ++ matched = string_matches_pattern (transliterated); ++ free (transliterated); ++ } ++ } ++ if (!matched) ++ goto done; ++ } ++#endif + /* Visible? */ + if (*visible == -1) + *visible = check_directory_perms (path) == 0; +@@ -632,6 +852,10 @@ help (void) + " -h, --help print this help\n" + " -i, --ignore-case ignore case distinctions when matching " + "patterns\n" ++#if HAVE_ICONV ++ " -t, --transliterate ignore accents using iconv " ++ "transliteration when matching patterns\n" ++#endif + " -l, --limit, -n LIMIT limit output (or counting) to LIMIT " + "entries\n" + " -m, --mmap ignored, for backward compatibility\n" +@@ -669,6 +893,7 @@ parse_options (int argc, char *argv[]) + { "follow", no_argument, NULL, 'L' }, + { "help", no_argument, NULL, 'h' }, + { "ignore-case", no_argument, NULL, 'i' }, ++ { "transliterate", no_argument, NULL, 't' }, + { "limit", required_argument, NULL, 'l' }, + { "mmap", no_argument, NULL, 'm' }, + { "quiet", no_argument, NULL, 'q' }, +@@ -691,7 +916,7 @@ parse_options (int argc, char *argv[]) + { + int opt, idx; + +- opt = getopt_long (argc, argv, "0AHPLSVbcd:ehil:mn:qr:sw", options, &idx); ++ opt = getopt_long (argc, argv, "0AHPLSVbcd:ehitl:mn:qr:sw", options, &idx); + switch (opt) + { + case -1: +@@ -772,6 +997,10 @@ parse_options (int argc, char *argv[]) + conf_ignore_case = true; + break; + ++ case 't': ++ conf_transliterate = true; ++ break; ++ + case 'l': case 'n': + { + char *end; +@@ -822,6 +1051,19 @@ parse_options (int argc, char *argv[]) + error (EXIT_FAILURE, 0, + _("non-option arguments are not allowed with --%s"), + conf_statistics != false ? "statistics" : "regexp"); ++ if (conf_transliterate != false) ++ { ++#if HAVE_ICONV ++ iconv_context = iconv_open ("ASCII//TRANSLIT", nl_langinfo (CODESET)); ++ if (iconv_context == (iconv_t) -1) ++ error (EXIT_FAILURE, errno, _("can not do transliteration between " \ ++ "these locales: `%s' and `ASCII'"), ++ nl_langinfo (CODESET)); ++#else ++ error (EXIT_FAILURE, errno, _("transliteration support is not supported" \ ++ "by this build of %s"), program_name); ++#endif ++ } + } + + /* Parse arguments in ARGC, ARGV. Exit on error. */ +@@ -834,6 +1076,21 @@ parse_arguments (int argc, char *argv[]) + string_list_append (&conf_patterns, argv[i]); + if (conf_statistics == false && conf_patterns.len == 0) + error (EXIT_FAILURE, 0, _("no pattern to search for specified")); ++#if HAVE_ICONV ++ if (conf_transliterate != false) ++ { ++ size_t patterns_len = conf_patterns.len; ++ char *transliterated; ++ ++ for (i = 0; i < patterns_len; i++) ++ { ++ transliterated = transliterate_string (conf_patterns.entries[i]); ++ ++ if (transliterated) ++ string_list_append (&conf_patterns, transliterated); ++ } ++ } ++#endif + conf_patterns.entries = xnrealloc (conf_patterns.entries, conf_patterns.len, + sizeof (*conf_patterns.entries)); + if (conf_match_regexp != false) +@@ -1042,6 +1299,10 @@ main (int argc, char *argv[]) + handle_dbpath_entry (conf_dbpath.entries[i]); + } + done: ++#if HAVE_ICONV ++ if (conf_transliterate != false && iconv_context) ++ iconv_close (iconv_context); ++#endif + if (conf_output_count != false) + printf ("%ju\n", matches_found); + if (conf_statistics != false || matches_found != 0) +-- +2.7.4 +