diff --git a/configure.ac b/configure.ac index 0c001b1..092581d 100644 --- a/configure.ac +++ b/configure.ac @@ -313,6 +313,24 @@ fi AM_CONDITIONAL(ENABLE_PYTHON, test x"$enable_python" = "xyes") +AC_ARG_ENABLE([uchardet], + AS_HELP_STRING([--enable-uchardet[=@<:@no/auto/yes@:>@]],[Build with uchardet support]), + [enable_uchardet=$enableval], + [enable_uchardet="auto"]) + +if test "x$enable_uchardet" = "xauto"; then + PKG_CHECK_EXISTS([uchardet], [enable_uchardet=yes],[enable_uchardet=no]) +fi + +if test "x$enable_uchardet" = "xyes"; then + PKG_CHECK_MODULES(UCHARDET, [uchardet]) + AC_SUBST(UCHARDET_CFLAGS) + AC_SUBST(UCHARDET_LIBS) + AC_DEFINE(ENABLE_UCHARDET, 1, [Define to 1 if to use uchardet for encoding detection]) +fi + +AM_CONDITIONAL(ENABLE_UCHARDET, test x"$enable_uchardet" = "xyes") + dnl ================================================================ dnl Start of pkg-config checks dnl ================================================================ diff --git a/gedit/Makefile.am b/gedit/Makefile.am index a386e07..514f3bd 100644 --- a/gedit/Makefile.am +++ b/gedit/Makefile.am @@ -238,6 +238,11 @@ if !ENABLE_GVFS_METADATA libgedit_la_SOURCES += gedit-metadata-manager.c endif +if ENABLE_UCHARDET +INCLUDES += $(UCHARDET_CFLAGS) +libgedit_la_LIBADD += $(UCHARDET_LIBS) +endif + if !PLATFORM_WIN32 libgedit_la_SOURCES += gedit-fifo.c NOINST_H_FILES += gedit-fifo.h diff --git a/gedit/gedit-document-output-stream.c b/gedit/gedit-document-output-stream.c index ebc8f80..346ed17 100644 --- a/gedit/gedit-document-output-stream.c +++ b/gedit/gedit-document-output-stream.c @@ -30,6 +30,11 @@ #include "gedit-document-output-stream.h" #include "gedit-debug.h" +#ifdef ENABLE_UCHARDET +#include "uchardet.h" +#include "gedit-encodings.h" +#endif + /* NOTE: never use async methods on this stream, the stream is just * a wrapper around GtkTextBuffer api so that we can use GIO Stream * methods, but the undelying code operates on a GtkTextBuffer, so @@ -355,6 +360,45 @@ try_convert (GCharsetConverter *converter, return ret; } +#ifdef ENABLE_UCHARDET +static void detect_encoding(GeditDocumentOutputStream *stream, + const void *inbuf, + gsize inbuf_size) +{ + uchardet_t ud; + const char *charset; + const GeditEncoding *enc; + GSList *list = stream->priv->encodings; + + ud = uchardet_new(); + uchardet_handle_data(ud, inbuf, inbuf_size); + uchardet_data_end(ud); + + charset = uchardet_get_charset(ud); + uchardet_delete(ud); + + if (!charset) + return; + + enc = gedit_encoding_get_from_charset (charset); + if (enc == NULL) + return; + + /* check if the encoding is already on the list */ + list = stream->priv->encodings; + while (list != NULL) { + if (list->data == (gpointer)enc) + return list; + + list = g_slist_next(list); + } + + /* prepend the detected charset */ + list = g_slist_prepend(stream->priv->encodings, (gpointer)enc); + stream->priv->encodings = list; +} +#endif + static GCharsetConverter * guess_encoding (GeditDocumentOutputStream *stream, const void *inbuf, @@ -368,6 +412,10 @@ guess_encoding (GeditDocumentOutputStream *stream, return NULL; } +#ifdef ENABLE_UCHARDET + detect_encoding(stream, inbuf, inbuf_size); +#endif + if (stream->priv->encodings != NULL && stream->priv->encodings->next == NULL) {