diff -Nru zlib-1.2.13.dfsg/debian/changelog zlib-1.2.13.dfsg/debian/changelog --- zlib-1.2.13.dfsg/debian/changelog 2022-11-15 18:06:45.000000000 +0100 +++ zlib-1.2.13.dfsg/debian/changelog 2022-11-21 20:28:58.000000000 +0100 @@ -1,3 +1,17 @@ +zlib (1:1.2.13.dfsg-1ubuntu3) lunar; urgency=medium + + * Re-add vectorized crc32 support for s390x by adding + d/p/s390x-vectorize-crc32.patch + (crc32vx-v4: s390x: vectorize crc32). (LP: #1998470) + This replaces the previously dropped patch: + lp1932010-ibm-z-add-vectorized-crc32-implementation.patch + * Remove option '--crc32-vx' for s390x in d/rules, that was previously just + commented out, since it's no longer needed with the new s390x crc32 code. + * Update d/p/410.patch to version 26f2c0a4e17e5558d779797d713aa37ebaeef390 + due to unused "const char *endptr;". + + -- Frank Heimes Mon, 21 Nov 2022 20:28:58 +0100 + zlib (1:1.2.13.dfsg-1ubuntu2) lunar; urgency=medium * Comment out use of --crc32-vx on s390x, since this is currently not diff -Nru zlib-1.2.13.dfsg/debian/patches/410.patch zlib-1.2.13.dfsg/debian/patches/410.patch --- zlib-1.2.13.dfsg/debian/patches/410.patch 2022-11-08 00:57:28.000000000 +0100 +++ zlib-1.2.13.dfsg/debian/patches/410.patch 2022-11-21 20:28:58.000000000 +0100 @@ -1,4 +1,4 @@ -From 9979abaceb606a8a3e6d897873d174eca7a30a5f Mon Sep 17 00:00:00 2001 +From 26f2c0a4e17e5558d779797d713aa37ebaeef390 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 18 Jul 2018 13:14:07 +0200 Subject: [PATCH] Add support for IBM Z hardware-accelerated deflate @@ -94,51 +94,14 @@ Since the first call to dfltcc_inflate already needs the window, and it might be not allocated yet, inflate_ensure_window was factored out of updatewindow and made ZLIB_INTERNAL. ---- - Makefile.in | 8 + - compress.c | 14 +- - configure | 24 + - contrib/README.contrib | 4 + - contrib/s390/README.txt | 17 + - contrib/s390/dfltcc.c | 996 ++++++++++++++++++++++++++++++++++ - contrib/s390/dfltcc.h | 81 +++ - contrib/s390/dfltcc_deflate.h | 55 ++ - deflate.c | 82 ++- - deflate.h | 12 + - gzguts.h | 4 + - inflate.c | 87 ++- - inflate.h | 2 + - test/infcover.c | 4 +- - test/minigzip.c | 4 + - trees.c | 13 +- - zutil.h | 2 + - 17 files changed, 1349 insertions(+), 60 deletions(-) - create mode 100644 contrib/s390/README.txt - create mode 100644 contrib/s390/dfltcc.c - create mode 100644 contrib/s390/dfltcc.h - create mode 100644 contrib/s390/dfltcc_deflate.h -diff --git a/Makefile.in b/Makefile.in -index 7d2713f..dca62f0 100644 ---- a/Makefile.in -+++ b/Makefile.in -@@ -139,6 +139,14 @@ match.lo: match.S - mv _match.o match.lo - rm -f _match.s - -+dfltcc.o: $(SRCDIR)contrib/s390/dfltcc.c $(SRCDIR)zlib.h zconf.h -+ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390/dfltcc.c -+ -+dfltcc.lo: $(SRCDIR)contrib/s390/dfltcc.c $(SRCDIR)zlib.h zconf.h -+ -@mkdir objs 2>/dev/null || test -d objs -+ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/dfltcc.o $(SRCDIR)contrib/s390/dfltcc.c -+ -@mv objs/dfltcc.o $@ -+ - example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h - $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/example.c - -diff --git a/compress.c b/compress.c -index 2ad5326..179ee27 100644 +Author: Ilya Leoshkevich +Origin: upstream, https://github.com/iii-i/zlib/commit/26f2c0a4e17e5558d779797d713aa37ebaeef390 +Forwarded: not-needed +Reviewed-by: Frank Heimes +Last-Update: 2022-11-30 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ --- a/compress.c +++ b/compress.c @@ -5,9 +5,15 @@ @@ -158,7 +121,7 @@ /* =========================================================================== Compresses the source buffer into the destination buffer. The level parameter has the same meaning as in deflateInit. sourceLen is the byte -@@ -81,6 +87,12 @@ int ZEXPORT compress(dest, destLen, source, sourceLen) +@@ -81,6 +87,12 @@ uLong ZEXPORT compressBound(sourceLen) uLong sourceLen; { @@ -171,11 +134,9 @@ return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + (sourceLen >> 25) + 13; } -diff --git a/configure b/configure -index fa4d5da..c7dc2a9 100755 --- a/configure +++ b/configure -@@ -118,6 +118,7 @@ case "$1" in +@@ -118,6 +118,7 @@ echo ' configure [--const] [--zprefix] [--prefix=PREFIX] [--eprefix=EXPREFIX]' | tee -a configure.log echo ' [--static] [--64] [--libdir=LIBDIR] [--sharedlibdir=LIBDIR]' | tee -a configure.log echo ' [--includedir=INCLUDEDIR] [--archs="-arch i386 -arch x86_64"]' | tee -a configure.log @@ -183,7 +144,7 @@ exit 0 ;; -p*=* | --prefix=*) prefix=`echo $1 | sed 's/.*=//'`; shift ;; -e*=* | --eprefix=*) exec_prefix=`echo $1 | sed 's/.*=//'`; shift ;; -@@ -142,6 +143,16 @@ case "$1" in +@@ -142,6 +143,16 @@ -w* | --warn) warn=1; shift ;; -d* | --debug) debug=1; shift ;; --sanitize) sanitize=1; shift ;; @@ -200,7 +161,7 @@ *) echo "unknown option: $1" | tee -a configure.log echo "$0 --help for help" | tee -a configure.log -@@ -828,6 +839,19 @@ EOF +@@ -828,6 +839,19 @@ fi fi @@ -220,11 +181,9 @@ # show the results in the log echo >> configure.log echo ALL = $ALL >> configure.log -diff --git a/contrib/README.contrib b/contrib/README.contrib -index 5e5f950..21bd729 100644 --- a/contrib/README.contrib +++ b/contrib/README.contrib -@@ -46,6 +46,10 @@ puff/ by Mark Adler +@@ -46,6 +46,10 @@ Small, low memory usage inflate. Also serves to provide an unambiguous description of the deflate format. @@ -235,35 +194,9 @@ testzlib/ by Gilles Vollant Example of the use of zlib -diff --git a/contrib/s390/README.txt b/contrib/s390/README.txt -new file mode 100644 -index 0000000..48be008 ---- /dev/null -+++ b/contrib/s390/README.txt -@@ -0,0 +1,17 @@ -+IBM Z mainframes starting from version z15 provide DFLTCC instruction, -+which implements deflate algorithm in hardware with estimated -+compression and decompression performance orders of magnitude faster -+than the current zlib and ratio comparable with that of level 1. -+ -+This directory adds DFLTCC support. In order to enable it, the following -+build commands should be used: -+ -+ $ ./configure --dfltcc -+ $ make -+ -+When built like this, zlib would compress in hardware on level 1, and in -+software on all other levels. Decompression will always happen in -+hardware. In order to enable DFLTCC compression for levels 1-6 (i.e. to -+make it used by default) one could either configure with -+--dfltcc-level-mask=0x7e or set the environment variable -+DFLTCC_LEVEL_MASK to 0x7e at run time. -diff --git a/contrib/s390/dfltcc.c b/contrib/s390/dfltcc.c -new file mode 100644 -index 0000000..cd95929 --- /dev/null +++ b/contrib/s390/dfltcc.c -@@ -0,0 +1,996 @@ +@@ -0,0 +1,995 @@ +/* dfltcc.c - SystemZ DEFLATE CONVERSION CALL support. */ + +/* @@ -968,7 +901,6 @@ +__attribute__((constructor)) local void init_globals OF((void)); +__attribute__((constructor)) local void init_globals(void) +{ -+ const char *endptr; + const char *env; + register char r0 __asm__("r0"); + @@ -1260,9 +1192,64 @@ + *dict_length = param->hl; + return Z_OK; +} -diff --git a/contrib/s390/dfltcc.h b/contrib/s390/dfltcc.h -new file mode 100644 -index 0000000..da26612 +--- /dev/null ++++ b/contrib/s390/dfltcc_deflate.h +@@ -0,0 +1,55 @@ ++#ifndef DFLTCC_DEFLATE_H ++#define DFLTCC_DEFLATE_H ++ ++#include "dfltcc.h" ++ ++int ZLIB_INTERNAL dfltcc_can_deflate OF((z_streamp strm)); ++int ZLIB_INTERNAL dfltcc_deflate OF((z_streamp strm, ++ int flush, ++ block_state *result)); ++int ZLIB_INTERNAL dfltcc_deflate_params OF((z_streamp strm, ++ int level, ++ int strategy, ++ int *flush)); ++int ZLIB_INTERNAL dfltcc_deflate_done OF((z_streamp strm, int flush)); ++int ZLIB_INTERNAL dfltcc_deflate_set_dictionary OF((z_streamp strm, ++ const Bytef *dictionary, ++ uInt dict_length)); ++int ZLIB_INTERNAL dfltcc_deflate_get_dictionary OF((z_streamp strm, ++ Bytef *dictionary, ++ uInt* dict_length)); ++ ++#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ ++ do { \ ++ if (dfltcc_can_deflate((strm))) \ ++ return dfltcc_deflate_set_dictionary((strm), (dict), (dict_len)); \ ++ } while (0) ++#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ ++ do { \ ++ if (dfltcc_can_deflate((strm))) \ ++ return dfltcc_deflate_get_dictionary((strm), (dict), (dict_len)); \ ++ } while (0) ++#define DEFLATE_RESET_KEEP_HOOK(strm) \ ++ dfltcc_reset((strm), sizeof(deflate_state)) ++#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) \ ++ do { \ ++ int err; \ ++\ ++ err = dfltcc_deflate_params((strm), \ ++ (level), \ ++ (strategy), \ ++ (hook_flush)); \ ++ if (err == Z_STREAM_ERROR) \ ++ return err; \ ++ } while (0) ++#define DEFLATE_DONE dfltcc_deflate_done ++#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \ ++ do { \ ++ if (dfltcc_can_deflate((strm))) \ ++ (complen) = DEFLATE_BOUND_COMPLEN(source_len); \ ++ } while (0) ++#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (dfltcc_can_deflate((strm))) ++#define DEFLATE_HOOK dfltcc_deflate ++#define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm))) ++ ++#endif --- /dev/null +++ b/contrib/s390/dfltcc.h @@ -0,0 +1,81 @@ @@ -1347,72 +1334,29 @@ + } while (0) + +#endif -diff --git a/contrib/s390/dfltcc_deflate.h b/contrib/s390/dfltcc_deflate.h -new file mode 100644 -index 0000000..46acfc5 --- /dev/null -+++ b/contrib/s390/dfltcc_deflate.h -@@ -0,0 +1,55 @@ -+#ifndef DFLTCC_DEFLATE_H -+#define DFLTCC_DEFLATE_H -+ -+#include "dfltcc.h" ++++ b/contrib/s390/README.txt +@@ -0,0 +1,17 @@ ++IBM Z mainframes starting from version z15 provide DFLTCC instruction, ++which implements deflate algorithm in hardware with estimated ++compression and decompression performance orders of magnitude faster ++than the current zlib and ratio comparable with that of level 1. + -+int ZLIB_INTERNAL dfltcc_can_deflate OF((z_streamp strm)); -+int ZLIB_INTERNAL dfltcc_deflate OF((z_streamp strm, -+ int flush, -+ block_state *result)); -+int ZLIB_INTERNAL dfltcc_deflate_params OF((z_streamp strm, -+ int level, -+ int strategy, -+ int *flush)); -+int ZLIB_INTERNAL dfltcc_deflate_done OF((z_streamp strm, int flush)); -+int ZLIB_INTERNAL dfltcc_deflate_set_dictionary OF((z_streamp strm, -+ const Bytef *dictionary, -+ uInt dict_length)); -+int ZLIB_INTERNAL dfltcc_deflate_get_dictionary OF((z_streamp strm, -+ Bytef *dictionary, -+ uInt* dict_length)); ++This directory adds DFLTCC support. In order to enable it, the following ++build commands should be used: + -+#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ -+ do { \ -+ if (dfltcc_can_deflate((strm))) \ -+ return dfltcc_deflate_set_dictionary((strm), (dict), (dict_len)); \ -+ } while (0) -+#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ -+ do { \ -+ if (dfltcc_can_deflate((strm))) \ -+ return dfltcc_deflate_get_dictionary((strm), (dict), (dict_len)); \ -+ } while (0) -+#define DEFLATE_RESET_KEEP_HOOK(strm) \ -+ dfltcc_reset((strm), sizeof(deflate_state)) -+#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) \ -+ do { \ -+ int err; \ -+\ -+ err = dfltcc_deflate_params((strm), \ -+ (level), \ -+ (strategy), \ -+ (hook_flush)); \ -+ if (err == Z_STREAM_ERROR) \ -+ return err; \ -+ } while (0) -+#define DEFLATE_DONE dfltcc_deflate_done -+#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \ -+ do { \ -+ if (dfltcc_can_deflate((strm))) \ -+ (complen) = DEFLATE_BOUND_COMPLEN(source_len); \ -+ } while (0) -+#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (dfltcc_can_deflate((strm))) -+#define DEFLATE_HOOK dfltcc_deflate -+#define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm))) ++ $ ./configure --dfltcc ++ $ make + -+#endif -diff --git a/deflate.c b/deflate.c -index 4a689db..9fd3bdb 100644 ++When built like this, zlib would compress in hardware on level 1, and in ++software on all other levels. Decompression will always happen in ++hardware. In order to enable DFLTCC compression for levels 1-6 (i.e. to ++make it used by default) one could either configure with ++--dfltcc-level-mask=0x7e or set the environment variable ++DFLTCC_LEVEL_MASK to 0x7e at run time. --- a/deflate.c +++ b/deflate.c -@@ -61,15 +61,30 @@ const char deflate_copyright[] = +@@ -61,15 +61,30 @@ */ /* =========================================================================== @@ -1450,7 +1394,7 @@ typedef block_state (*compress_func) OF((deflate_state *s, int flush)); /* Compression function. Returns the block state after the call. */ -@@ -85,7 +100,6 @@ local block_state deflate_rle OF((deflate_state *s, int flush)); +@@ -85,7 +100,6 @@ local block_state deflate_huff OF((deflate_state *s, int flush)); local void lm_init OF((deflate_state *s)); local void putShortMSB OF((deflate_state *s, uInt b)); @@ -1458,7 +1402,7 @@ local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); local uInt longest_match OF((deflate_state *s, IPos cur_match)); -@@ -295,7 +309,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, +@@ -295,7 +309,7 @@ return Z_STREAM_ERROR; } if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ @@ -1467,7 +1411,7 @@ if (s == Z_NULL) return Z_MEM_ERROR; strm->state = (struct internal_state FAR *)s; s->strm = strm; -@@ -312,7 +326,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, +@@ -312,7 +326,7 @@ s->hash_mask = s->hash_size - 1; s->hash_shift = ((s->hash_bits + MIN_MATCH-1) / MIN_MATCH); @@ -1476,7 +1420,7 @@ s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); -@@ -430,6 +444,7 @@ int ZEXPORT deflateSetDictionary(strm, dictionary, dictLength) +@@ -430,6 +444,7 @@ /* when using zlib wrappers, compute Adler-32 for provided dictionary */ if (wrap == 1) strm->adler = adler32(strm->adler, dictionary, dictLength); @@ -1484,7 +1428,7 @@ s->wrap = 0; /* avoid computing Adler-32 in read_buf */ /* if dictionary would fill window, just replace the history */ -@@ -488,6 +503,7 @@ int ZEXPORT deflateGetDictionary(strm, dictionary, dictLength) +@@ -488,6 +503,7 @@ if (deflateStateCheck(strm)) return Z_STREAM_ERROR; @@ -1492,7 +1436,7 @@ s = strm->state; len = s->strstart + s->lookahead; if (len > s->w_size) -@@ -534,6 +550,8 @@ int ZEXPORT deflateResetKeep(strm) +@@ -534,6 +550,8 @@ _tr_init(s); @@ -1501,7 +1445,7 @@ return Z_OK; } -@@ -609,6 +627,7 @@ int ZEXPORT deflateParams(strm, level, strategy) +@@ -609,6 +627,7 @@ { deflate_state *s; compress_func func; @@ -1509,7 +1453,7 @@ if (deflateStateCheck(strm)) return Z_STREAM_ERROR; s = strm->state; -@@ -621,15 +640,18 @@ int ZEXPORT deflateParams(strm, level, strategy) +@@ -621,15 +640,18 @@ if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { return Z_STREAM_ERROR; } @@ -1532,7 +1476,7 @@ return Z_BUF_ERROR; } if (s->level != level) { -@@ -705,11 +727,13 @@ uLong ZEXPORT deflateBound(strm, sourceLen) +@@ -705,11 +727,13 @@ ~13% overhead plus a small constant */ fixedlen = sourceLen + (sourceLen >> 3) + (sourceLen >> 8) + (sourceLen >> 9) + 4; @@ -1546,7 +1490,7 @@ /* if can't get parameters, return larger bound plus a zlib wrapper */ if (deflateStateCheck(strm)) -@@ -751,7 +775,8 @@ uLong ZEXPORT deflateBound(strm, sourceLen) +@@ -751,7 +775,8 @@ } /* if not default parameters, return one of the conservative bounds */ @@ -1556,7 +1500,7 @@ return (s->w_bits <= s->hash_bits ? fixedlen : storelen) + wraplen; /* default settings: return tight bound for that case -- ~0.03% overhead -@@ -779,7 +804,7 @@ local void putShortMSB(s, b) +@@ -779,7 +804,7 @@ * applications may wish to modify it to avoid allocating a large * strm->next_out buffer and copying into it. (See also read_buf()). */ @@ -1565,7 +1509,7 @@ z_streamp strm; { unsigned len; -@@ -1051,7 +1076,8 @@ int ZEXPORT deflate(strm, flush) +@@ -1051,7 +1076,8 @@ (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { block_state bstate; @@ -1575,7 +1519,7 @@ s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : s->strategy == Z_RLE ? deflate_rle(s, flush) : (*(configuration_table[s->level].func))(s, flush); -@@ -1098,7 +1124,6 @@ int ZEXPORT deflate(strm, flush) +@@ -1098,7 +1124,6 @@ } if (flush != Z_FINISH) return Z_OK; @@ -1583,7 +1527,7 @@ /* Write the trailer */ #ifdef GZIP -@@ -1114,7 +1139,7 @@ int ZEXPORT deflate(strm, flush) +@@ -1114,7 +1139,7 @@ } else #endif @@ -1592,7 +1536,7 @@ putShortMSB(s, (uInt)(strm->adler >> 16)); putShortMSB(s, (uInt)(strm->adler & 0xffff)); } -@@ -1123,7 +1148,11 @@ int ZEXPORT deflate(strm, flush) +@@ -1123,7 +1148,11 @@ * to flush the rest. */ if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ @@ -1605,7 +1549,7 @@ } /* ========================================================================= */ -@@ -1140,9 +1169,9 @@ int ZEXPORT deflateEnd(strm) +@@ -1140,9 +1169,9 @@ TRY_FREE(strm, strm->state->pending_buf); TRY_FREE(strm, strm->state->head); TRY_FREE(strm, strm->state->prev); @@ -1617,7 +1561,7 @@ strm->state = Z_NULL; return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; -@@ -1172,13 +1201,13 @@ int ZEXPORT deflateCopy(dest, source) +@@ -1172,13 +1201,13 @@ zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); @@ -1634,7 +1578,7 @@ ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4); -@@ -1225,7 +1254,8 @@ local unsigned read_buf(strm, buf, size) +@@ -1225,7 +1254,8 @@ strm->avail_in -= len; zmemcpy(buf, strm->next_in, len); @@ -1644,11 +1588,9 @@ strm->adler = adler32(strm->adler, buf, len); } #ifdef GZIP -diff --git a/deflate.h b/deflate.h -index 1a06cd5..f92750c 100644 --- a/deflate.h +++ b/deflate.h -@@ -299,6 +299,7 @@ void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s)); +@@ -299,6 +299,7 @@ void ZLIB_INTERNAL _tr_align OF((deflate_state *s)); void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, int last)); @@ -1656,7 +1598,7 @@ #define d_code(dist) \ ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) -@@ -343,4 +344,15 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, +@@ -343,4 +344,15 @@ flush = _tr_tally(s, distance, length) #endif @@ -1672,8 +1614,6 @@ +void ZLIB_INTERNAL flush_pending OF((z_streamp strm)); + #endif /* DEFLATE_H */ -diff --git a/gzguts.h b/gzguts.h -index 57faf37..581f2b6 100644 --- a/gzguts.h +++ b/gzguts.h @@ -153,7 +153,11 @@ @@ -1688,8 +1628,6 @@ /* gzip modes, also provide a little integrity check on the passed structure */ #define GZ_NONE 0 -diff --git a/inflate.c b/inflate.c -index 8acbef4..918dfa7 100644 --- a/inflate.c +++ b/inflate.c @@ -85,6 +85,24 @@ @@ -1717,7 +1655,7 @@ #ifdef MAKEFIXED # ifndef BUILDFIXED # define BUILDFIXED -@@ -138,6 +156,7 @@ z_streamp strm; +@@ -138,6 +156,7 @@ state->lencode = state->distcode = state->next = state->codes; state->sane = 1; state->back = -1; @@ -1725,7 +1663,7 @@ Tracev((stderr, "inflate: reset\n")); return Z_OK; } -@@ -185,7 +204,7 @@ int windowBits; +@@ -185,7 +204,7 @@ if (windowBits && (windowBits < 8 || windowBits > 15)) return Z_STREAM_ERROR; if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { @@ -1734,7 +1672,7 @@ state->window = Z_NULL; } -@@ -224,7 +243,7 @@ int stream_size; +@@ -224,7 +243,7 @@ strm->zfree = zcfree; #endif state = (struct inflate_state FAR *) @@ -1743,7 +1681,7 @@ if (state == Z_NULL) return Z_MEM_ERROR; Tracev((stderr, "inflate: allocated\n")); strm->state = (struct internal_state FAR *)state; -@@ -233,7 +252,7 @@ int stream_size; +@@ -233,7 +252,7 @@ state->mode = HEAD; /* to pass state test in inflateReset2() */ ret = inflateReset2(strm, windowBits); if (ret != Z_OK) { @@ -1752,7 +1690,7 @@ strm->state = Z_NULL; } return ret; -@@ -255,6 +274,7 @@ int value; +@@ -255,6 +274,7 @@ struct inflate_state FAR *state; if (inflateStateCheck(strm)) return Z_STREAM_ERROR; @@ -1760,7 +1698,7 @@ state = (struct inflate_state FAR *)strm->state; if (bits < 0) { state->hold = 0; -@@ -382,6 +402,27 @@ void makefixed() +@@ -382,6 +402,27 @@ } #endif /* MAKEFIXED */ @@ -1788,7 +1726,7 @@ /* Update the window with the last wsize (normally 32K) bytes written before returning. If window does not exist yet, create it. This is only called -@@ -406,20 +447,7 @@ unsigned copy; +@@ -406,20 +447,7 @@ state = (struct inflate_state FAR *)strm->state; @@ -1810,7 +1748,7 @@ /* copy state->wsize or less output bytes into the circular window */ if (copy >= state->wsize) { -@@ -863,6 +891,7 @@ int flush; +@@ -863,6 +891,7 @@ if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; /* fallthrough */ case TYPEDO: @@ -1818,7 +1756,7 @@ if (state->last) { BYTEBITS(); state->mode = CHECK; -@@ -1224,7 +1253,7 @@ int flush; +@@ -1224,7 +1253,7 @@ out -= left; strm->total_out += out; state->total += out; @@ -1827,7 +1765,7 @@ strm->adler = state->check = UPDATE_CHECK(state->check, put - out, out); out = left; -@@ -1279,8 +1308,9 @@ int flush; +@@ -1279,8 +1308,9 @@ */ inf_leave: RESTORE(); @@ -1839,7 +1777,7 @@ if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { state->mode = MEM; return Z_MEM_ERROR; -@@ -1290,7 +1320,7 @@ int flush; +@@ -1290,7 +1320,7 @@ strm->total_in += in; strm->total_out += out; state->total += out; @@ -1848,7 +1786,7 @@ strm->adler = state->check = UPDATE_CHECK(state->check, strm->next_out - out, out); strm->data_type = (int)state->bits + (state->last ? 64 : 0) + -@@ -1308,8 +1338,8 @@ z_streamp strm; +@@ -1308,8 +1338,8 @@ if (inflateStateCheck(strm)) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)strm->state; @@ -1859,7 +1797,7 @@ strm->state = Z_NULL; Tracev((stderr, "inflate: end\n")); return Z_OK; -@@ -1488,6 +1518,7 @@ z_streamp strm; +@@ -1488,6 +1518,7 @@ struct inflate_state FAR *state; if (inflateStateCheck(strm)) return Z_STREAM_ERROR; @@ -1867,7 +1805,7 @@ state = (struct inflate_state FAR *)strm->state; return state->mode == STORED && state->bits == 0; } -@@ -1508,21 +1539,22 @@ z_streamp source; +@@ -1508,21 +1539,22 @@ /* allocate space */ copy = (struct inflate_state FAR *) @@ -1894,7 +1832,7 @@ copy->strm = dest; if (state->lencode >= state->codes && state->lencode <= state->codes + ENOUGH - 1) { -@@ -1579,6 +1611,7 @@ z_streamp strm; +@@ -1579,6 +1611,7 @@ if (inflateStateCheck(strm)) return -(1L << 16); @@ -1902,21 +1840,34 @@ state = (struct inflate_state FAR *)strm->state; return (long)(((unsigned long)((long)state->back)) << 16) + (state->mode == COPY ? state->length : -diff --git a/inflate.h b/inflate.h -index f127b6b..519ed35 100644 --- a/inflate.h +++ b/inflate.h -@@ -124,3 +124,5 @@ struct inflate_state { +@@ -124,3 +124,5 @@ int back; /* bits back of last unprocessed length/lit */ unsigned was; /* initial length of match */ }; + +int ZLIB_INTERNAL inflate_ensure_window OF((struct inflate_state *state)); -diff --git a/test/infcover.c b/test/infcover.c -index 2be0164..a208219 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -139,6 +139,14 @@ + mv _match.o match.lo + rm -f _match.s + ++dfltcc.o: $(SRCDIR)contrib/s390/dfltcc.c $(SRCDIR)zlib.h zconf.h ++ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390/dfltcc.c ++ ++dfltcc.lo: $(SRCDIR)contrib/s390/dfltcc.c $(SRCDIR)zlib.h zconf.h ++ -@mkdir objs 2>/dev/null || test -d objs ++ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/dfltcc.o $(SRCDIR)contrib/s390/dfltcc.c ++ -@mv objs/dfltcc.o $@ ++ + example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h + $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/example.c + --- a/test/infcover.c +++ b/test/infcover.c -@@ -373,7 +373,7 @@ local void cover_support(void) +@@ -373,7 +373,7 @@ mem_setup(&strm); strm.avail_in = 0; strm.next_in = Z_NULL; @@ -1925,7 +1876,7 @@ assert(ret == Z_VERSION_ERROR); mem_done(&strm, "wrong version"); -@@ -444,7 +444,7 @@ local void cover_wrap(void) +@@ -444,7 +444,7 @@ } /* input and output functions for inflateBack() */ @@ -1934,11 +1885,9 @@ { static unsigned int next = 0; static unsigned char dat[] = {0x63, 0, 2, 0}; -diff --git a/test/minigzip.c b/test/minigzip.c -index a649d2b..964408a 100644 --- a/test/minigzip.c +++ b/test/minigzip.c -@@ -132,7 +132,11 @@ static void pwinerror (s) +@@ -132,7 +132,11 @@ #endif #define SUFFIX_LEN (sizeof(GZ_SUFFIX)-1) @@ -1950,11 +1899,9 @@ #define MAX_NAME_LEN 1024 #ifdef MAXSEG_64K -diff --git a/trees.c b/trees.c -index 5f305c4..4924bdf 100644 --- a/trees.c +++ b/trees.c -@@ -149,8 +149,6 @@ local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, +@@ -149,8 +149,6 @@ local void compress_block OF((deflate_state *s, const ct_data *ltree, const ct_data *dtree)); local int detect_data_type OF((deflate_state *s)); @@ -1963,7 +1910,7 @@ local void bi_flush OF((deflate_state *s)); #ifdef GEN_TREES_H -@@ -223,6 +221,13 @@ local void send_bits(s, value, length) +@@ -223,6 +221,13 @@ } #endif /* ZLIB_DEBUG */ @@ -1977,7 +1924,7 @@ /* the arguments must not have side effects */ -@@ -1133,7 +1138,7 @@ local int detect_data_type(s) +@@ -1133,7 +1138,7 @@ * method would use a table) * IN assertion: 1 <= len <= 15 */ @@ -1986,7 +1933,7 @@ unsigned code; /* the value to invert */ int len; /* its bit length */ { -@@ -1165,7 +1170,7 @@ local void bi_flush(s) +@@ -1165,7 +1170,7 @@ /* =========================================================================== * Flush the bit buffer and align the output on a byte boundary */ @@ -1995,11 +1942,9 @@ deflate_state *s; { if (s->bi_valid > 8) { -diff --git a/zutil.h b/zutil.h -index 0bc7f4e..75eb4df 100644 --- a/zutil.h +++ b/zutil.h -@@ -87,6 +87,8 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +@@ -87,6 +87,8 @@ #define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ @@ -2008,6 +1953,3 @@ /* target dependencies */ #if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) --- -2.34.1 - diff -Nru zlib-1.2.13.dfsg/debian/patches/s390x-vectorize-crc32.patch zlib-1.2.13.dfsg/debian/patches/s390x-vectorize-crc32.patch --- zlib-1.2.13.dfsg/debian/patches/s390x-vectorize-crc32.patch 1970-01-01 01:00:00.000000000 +0100 +++ zlib-1.2.13.dfsg/debian/patches/s390x-vectorize-crc32.patch 2022-11-21 20:28:58.000000000 +0100 @@ -0,0 +1,402 @@ +Description: s390x: vectorize crc32 + Use vector extensions when compiling for s390x and binutils knows + about them. At runtime, check whether kernel supports vector + extensions (it has to be not just the CPU, but also the kernel) and + choose between the regular and the vectorized implementations. +Author: Ilya Leoshkevich +Origin: https://github.com/iii-i/zlib/commit/05710d5fb8eb1447289ebf11109e149ece95d839 +Bug-Ubuntu: https://bugs.launchpad.net/bugs/1234567 +Forwarded: not-needed +Reviewed-by: Frank Heimes +Last-Update: 2022-11-21 +--- a/Makefile.in ++++ b/Makefile.in +@@ -25,6 +25,7 @@ + TEST_LDFLAGS=$(LDFLAGS) -L. libz.a + LDSHARED=$(CC) + CPP=$(CC) -E ++VGFMAFLAG= + + STATICLIB=libz.a + SHAREDLIB=libz.so +@@ -175,6 +176,9 @@ + crc32_z_power8.o: $(SRCDIR)contrib/power/crc32_z_power8.c + $(CC) $(CFLAGS) -mcpu=power8 $(ZINC) -c -o $@ $(SRCDIR)contrib/power/crc32_z_power8.c + ++crc32-vx.o: $(SRCDIR)contrib/s390/crc32-vx.c ++ $(CC) $(CFLAGS) $(VGFMAFLAG) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390/crc32-vx.c ++ + deflate.o: $(SRCDIR)deflate.c + $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c + +@@ -225,6 +229,11 @@ + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c + -@mv objs/crc32.o $@ + ++crc32-vx.lo: $(SRCDIR)contrib/s390/crc32-vx.c ++ -@mkdir objs 2>/dev/null || test -d objs ++ $(CC) $(SFLAGS) $(VGFMAFLAG) $(ZINC) -DPIC -c -o objs/crc32-vx.o $(SRCDIR)contrib/s390/crc32-vx.c ++ -@mv objs/crc32-vx.o $@ ++ + crc32_z_power8.lo: $(SRCDIR)contrib/power/crc32_z_power8.c + -@mkdir objs 2>/dev/null || test -d objs + $(CC) $(SFLAGS) -mcpu=power8 $(ZINC) -DPIC -c -o objs/crc32_z_power8.o $(SRCDIR)contrib/power/crc32_z_power8.c +--- a/configure ++++ b/configure +@@ -921,6 +921,32 @@ + echo "Checking for Power optimizations support... No." | tee -a configure.log + fi + ++# check if we are compiling for s390 and binutils support vector extensions ++VGFMAFLAG=-march=z13 ++cat > $test.c <> configure.log + echo ALL = $ALL >> configure.log +@@ -952,6 +978,7 @@ + echo prefix = $prefix >> configure.log + echo sharedlibdir = $sharedlibdir >> configure.log + echo uname = $uname >> configure.log ++echo VGFMAFLAG = $VGFMAFLAG >> configure.log + + # udpate Makefile with the configure results + sed < ${SRCDIR}Makefile.in " +@@ -961,6 +988,7 @@ + /^LDFLAGS *=/s#=.*#=$LDFLAGS# + /^LDSHARED *=/s#=.*#=$LDSHARED# + /^CPP *=/s#=.*#=$CPP# ++/^VGFMAFLAG *=/s#=.*#=$VGFMAFLAG# + /^STATICLIB *=/s#=.*#=$STATICLIB# + /^SHAREDLIB *=/s#=.*#=$SHAREDLIB# + /^SHAREDLIBV *=/s#=.*#=$SHAREDLIBV# +--- a/contrib/gcc/zifunc.h ++++ b/contrib/gcc/zifunc.h +@@ -8,9 +8,28 @@ + + /* Helpers for arch optimizations */ + ++#if defined(__clang__) ++#if __has_feature(coverage_sanitizer) ++#define Z_IFUNC_NO_SANCOV __attribute__((no_sanitize("coverage"))) ++#else /* __has_feature(coverage_sanitizer) */ ++#define Z_IFUNC_NO_SANCOV ++#endif /* __has_feature(coverage_sanitizer) */ ++#else /* __clang__ */ ++#define Z_IFUNC_NO_SANCOV ++#endif /* __clang__ */ ++ ++#ifdef __s390__ ++#define Z_IFUNC_PARAMS unsigned long hwcap ++#define Z_IFUNC_ATTRS Z_IFUNC_NO_SANCOV ++#else /* __s390__ */ ++#define Z_IFUNC_PARAMS void ++#define Z_IFUNC_ATTRS ++#endif /* __s390__ */ ++ + #define Z_IFUNC(fname) \ + typeof(fname) fname __attribute__ ((ifunc (#fname "_resolver"))); \ +- local typeof(fname) *fname##_resolver(void) ++ Z_IFUNC_ATTRS \ ++ local typeof(fname) *fname##_resolver(Z_IFUNC_PARAMS) + /* This is a helper macro to declare a resolver for an indirect function + * (ifunc). Let's say you have function + * +--- /dev/null ++++ b/contrib/s390/crc32-vx.c +@@ -0,0 +1,195 @@ ++/* ++ * Hardware-accelerated CRC-32 variants for Linux on z Systems ++ * ++ * Use the z/Architecture Vector Extension Facility to accelerate the ++ * computing of bitreflected CRC-32 checksums. ++ * ++ * This CRC-32 implementation algorithm is bitreflected and processes ++ * the least-significant bit first (Little-Endian). ++ * ++ * This code was originally written by Hendrik Brueckner ++ * for use in the Linux kernel and has been ++ * relicensed under the zlib license. ++ */ ++ ++#include "../../zutil.h" ++ ++#include ++#include ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++ ++uint32_t crc32_le_vgfm_16(uint32_t crc, const unsigned char *buf, size_t len) { ++ /* ++ * The CRC-32 constant block contains reduction constants to fold and ++ * process particular chunks of the input data stream in parallel. ++ * ++ * For the CRC-32 variants, the constants are precomputed according to ++ * these definitions: ++ * ++ * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1 ++ * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1 ++ * R3 = [(x128+32 mod P'(x) << 32)]' << 1 ++ * R4 = [(x128-32 mod P'(x) << 32)]' << 1 ++ * R5 = [(x64 mod P'(x) << 32)]' << 1 ++ * R6 = [(x32 mod P'(x) << 32)]' << 1 ++ * ++ * The bitreflected Barret reduction constant, u', is defined as ++ * the bit reversal of floor(x**64 / P(x)). ++ * ++ * where P(x) is the polynomial in the normal domain and the P'(x) is the ++ * polynomial in the reversed (bitreflected) domain. ++ * ++ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: ++ * ++ * P(x) = 0x04C11DB7 ++ * P'(x) = 0xEDB88320 ++ */ ++ const uv16qi perm_le2be = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; /* BE->LE mask */ ++ const uv2di r2r1 = {0x1C6E41596, 0x154442BD4}; /* R2, R1 */ ++ const uv2di r4r3 = {0x0CCAA009E, 0x1751997D0}; /* R4, R3 */ ++ const uv2di r5 = {0, 0x163CD6124}; /* R5 */ ++ const uv2di ru_poly = {0, 0x1F7011641}; /* u' */ ++ const uv2di crc_poly = {0, 0x1DB710641}; /* P'(x) << 1 */ ++ ++ /* ++ * Load the initial CRC value. ++ * ++ * The CRC value is loaded into the rightmost word of the ++ * vector register and is later XORed with the LSB portion ++ * of the loaded input data. ++ */ ++ uv2di v0 = {0, 0}; ++ v0 = (uv2di)vec_insert(crc, (uv4si)v0, 3); ++ ++ /* Load a 64-byte data chunk and XOR with CRC */ ++ uv2di v1 = vec_perm(((uv2di *)buf)[0], ((uv2di *)buf)[0], perm_le2be); ++ uv2di v2 = vec_perm(((uv2di *)buf)[1], ((uv2di *)buf)[1], perm_le2be); ++ uv2di v3 = vec_perm(((uv2di *)buf)[2], ((uv2di *)buf)[2], perm_le2be); ++ uv2di v4 = vec_perm(((uv2di *)buf)[3], ((uv2di *)buf)[3], perm_le2be); ++ ++ v1 ^= v0; ++ buf += 64; ++ len -= 64; ++ ++ while (len >= 64) { ++ /* Load the next 64-byte data chunk */ ++ uv16qi part1 = vec_perm(((uv16qi *)buf)[0], ((uv16qi *)buf)[0], perm_le2be); ++ uv16qi part2 = vec_perm(((uv16qi *)buf)[1], ((uv16qi *)buf)[1], perm_le2be); ++ uv16qi part3 = vec_perm(((uv16qi *)buf)[2], ((uv16qi *)buf)[2], perm_le2be); ++ uv16qi part4 = vec_perm(((uv16qi *)buf)[3], ((uv16qi *)buf)[3], perm_le2be); ++ ++ /* ++ * Perform a GF(2) multiplication of the doublewords in V1 with ++ * the R1 and R2 reduction constants in V0. The intermediate result ++ * is then folded (accumulated) with the next data chunk in PART1 and ++ * stored in V1. Repeat this step for the register contents ++ * in V2, V3, and V4 respectively. ++ */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r2r1, v1, part1); ++ v2 = (uv2di)vec_gfmsum_accum_128(r2r1, v2, part2); ++ v3 = (uv2di)vec_gfmsum_accum_128(r2r1, v3, part3); ++ v4 = (uv2di)vec_gfmsum_accum_128(r2r1, v4, part4); ++ ++ buf += 64; ++ len -= 64; ++ } ++ ++ /* ++ * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3 ++ * and R4 and accumulating the next 128-bit chunk until a single 128-bit ++ * value remains. ++ */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2); ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v3); ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v4); ++ ++ while (len >= 16) { ++ /* Load next data chunk */ ++ v2 = vec_perm(*(uv2di *)buf, *(uv2di *)buf, perm_le2be); ++ ++ /* Fold next data chunk */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2); ++ ++ buf += 16; ++ len -= 16; ++ } ++ ++ /* ++ * Set up a vector register for byte shifts. The shift value must ++ * be loaded in bits 1-4 in byte element 7 of a vector register. ++ * Shift by 8 bytes: 0x40 ++ * Shift by 4 bytes: 0x20 ++ */ ++ uv16qi v9 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; ++ v9 = vec_insert((unsigned char)0x40, v9, 7); ++ ++ /* ++ * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes ++ * to move R4 into the rightmost doubleword and set the leftmost ++ * doubleword to 0x1. ++ */ ++ v0 = vec_srb(r4r3, (uv2di)v9); ++ v0[0] = 1; ++ ++ /* ++ * Compute GF(2) product of V1 and V0. The rightmost doubleword ++ * of V1 is multiplied with R4. The leftmost doubleword of V1 is ++ * multiplied by 0x1 and is then XORed with rightmost product. ++ * Implicitly, the intermediate leftmost product becomes padded ++ */ ++ v1 = (uv2di)vec_gfmsum_128(v0, v1); ++ ++ /* ++ * Now do the final 32-bit fold by multiplying the rightmost word ++ * in V1 with R5 and XOR the result with the remaining bits in V1. ++ * ++ * To achieve this by a single VGFMAG, right shift V1 by a word ++ * and store the result in V2 which is then accumulated. Use the ++ * vector unpack instruction to load the rightmost half of the ++ * doubleword into the rightmost doubleword element of V1; the other ++ * half is loaded in the leftmost doubleword. ++ * The vector register with CONST_R5 contains the R5 constant in the ++ * rightmost doubleword and the leftmost doubleword is zero to ignore ++ * the leftmost product of V1. ++ */ ++ v9 = vec_insert((unsigned char)0x20, v9, 7); ++ v2 = vec_srb(v1, (uv2di)v9); ++ v1 = vec_unpackl((uv4si)v1); /* Split rightmost doubleword */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r5, v1, (uv16qi)v2); ++ ++ /* ++ * Apply a Barret reduction to compute the final 32-bit CRC value. ++ * ++ * The input values to the Barret reduction are the degree-63 polynomial ++ * in V1 (R(x)), degree-32 generator polynomial, and the reduction ++ * constant u. The Barret reduction result is the CRC value of R(x) mod ++ * P(x). ++ * ++ * The Barret reduction algorithm is defined as: ++ * ++ * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u ++ * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) ++ * 3. C(x) = R(x) XOR T2(x) mod x^32 ++ * ++ * Note: The leftmost doubleword of vector register containing ++ * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product ++ * is zero and does not contribute to the final result. ++ */ ++ ++ /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ ++ v2 = vec_unpackl((uv4si)v1); ++ v2 = (uv2di)vec_gfmsum_128(ru_poly, v2); ++ ++ /* ++ * Compute the GF(2) product of the CRC polynomial with T1(x) in ++ * V2 and XOR the intermediate result, T2(x), with the value in V1. ++ * The final result is stored in word element 2 of V2. ++ */ ++ v2 = vec_unpackl((uv4si)v2); ++ v2 = (uv2di)vec_gfmsum_accum_128(crc_poly, v2, (uv16qi)v1); ++ ++ return ((uv4si)v2)[2]; ++} +--- /dev/null ++++ b/contrib/s390/crc32_z_resolver.c +@@ -0,0 +1,41 @@ ++#include ++#include "../gcc/zifunc.h" ++ ++#define VX_MIN_LEN 64 ++#define VX_ALIGNMENT 16L ++#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) ++ ++unsigned int crc32_le_vgfm_16(unsigned int crc, const unsigned char FAR *buf, z_size_t len); ++ ++local unsigned long s390_crc32_vx(unsigned long crc, const unsigned char FAR *buf, z_size_t len) ++{ ++ uintptr_t prealign, aligned, remaining; ++ ++ if (buf == Z_NULL) return 0UL; ++ ++ if (len < VX_MIN_LEN + VX_ALIGN_MASK) ++ return crc32_z_default(crc, buf, len); ++ ++ if ((uintptr_t)buf & VX_ALIGN_MASK) { ++ prealign = VX_ALIGNMENT - ((uintptr_t)buf & VX_ALIGN_MASK); ++ len -= prealign; ++ crc = crc32_z_default(crc, buf, prealign); ++ buf += prealign; ++ } ++ aligned = len & ~VX_ALIGN_MASK; ++ remaining = len & VX_ALIGN_MASK; ++ ++ crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, (size_t)aligned) ^ 0xffffffff; ++ ++ if (remaining) ++ crc = crc32_z_default(crc, buf + aligned, remaining); ++ ++ return crc; ++} ++ ++Z_IFUNC(crc32_z) ++{ ++ if (hwcap & HWCAP_S390_VX) ++ return s390_crc32_vx; ++ return crc32_z_default; ++} +--- a/crc32.c ++++ b/crc32.c +@@ -745,12 +745,12 @@ + #endif + + /* ========================================================================= */ +-#ifdef Z_POWER_OPT ++#if defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) + /* Rename function so resolver can use its symbol. The default version will be + * returned by the resolver if the host has no support for an optimized version. + */ + #define crc32_z crc32_z_default +-#endif /* Z_POWER_OPT */ ++#endif /* defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) */ + + unsigned long ZEXPORT crc32_z(crc, buf, len) + unsigned long crc; +@@ -1073,10 +1073,15 @@ + return crc ^ 0xffffffff; + } + +-#ifdef Z_POWER_OPT ++#if defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) + #undef crc32_z ++#ifdef Z_POWER_OPT + #include "contrib/power/crc32_z_resolver.c" + #endif /* Z_POWER_OPT */ ++#ifdef HAVE_S390X_VX ++#include "contrib/s390/crc32_z_resolver.c" ++#endif /* HAVE_S390X_VX */ ++#endif /* defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) */ + + #endif + diff -Nru zlib-1.2.13.dfsg/debian/patches/series zlib-1.2.13.dfsg/debian/patches/series --- zlib-1.2.13.dfsg/debian/patches/series 2022-11-08 00:57:28.000000000 +0100 +++ zlib-1.2.13.dfsg/debian/patches/series 2022-11-21 20:28:58.000000000 +0100 @@ -3,3 +3,4 @@ 410.patch 478.patch #lp1932010-ibm-z-add-vectorized-crc32-implementation.patch +s390x-vectorize-crc32.patch