diff '--exclude=.git' -Npaur ubuntu/zfs-linux/debian/changelog zfs-linux/debian/changelog
--- ubuntu/zfs-linux/debian/changelog	2020-05-16 17:46:19.015566898 -0400
+++ zfs-linux/debian/changelog	2020-05-02 16:21:21.000000000 -0400
@@ -1,13 +1,8 @@
-zfs-linux (0.8.3-1ubuntu13) groovy; urgency=medium
+zfs-linux (0.8.4-1ubuntu1) UNRELEASED; urgency=medium
 
-  * Backport AES-GCM performance accelleration
-   - backport of upstream zfs commit 31b160f0a6c673c8f926233af2ed6d5354808393
-     ("ICP: Improve AES-GCM performance"). 
-     tests on a memory backed pool show performance improvements of ~15-22%
-     for AES-CCM writes, ~17-20% AES-CCM reads, 34-36% AES-GCM writes and
-     ~79-80% AES-GCM reads.
+  *  Initial 0.8.4 test
 
- -- Colin Ian King <colin.king@canonical.com>  Tue, 5 May 2020 15:53:12 +0100
+ -- Satadru Pramanik <satadru@rpi4b.pramanik.com>  Sat, 02 May 2020 16:21:01 -0400
 
 zfs-linux (0.8.3-1ubuntu12) focal; urgency=medium
 
diff '--exclude=.git' -Npaur ubuntu/zfs-linux/debian/patches/2100-zfs-load-module.patch zfs-linux/debian/patches/2100-zfs-load-module.patch
--- ubuntu/zfs-linux/debian/patches/2100-zfs-load-module.patch	2020-05-16 17:46:48.143546632 -0400
+++ zfs-linux/debian/patches/2100-zfs-load-module.patch	1969-12-31 19:00:00.000000000 -0500
@@ -1,79 +0,0 @@
-Index: zfs-linux-0.8.2/etc/systemd/system/Makefile.am
-===================================================================
---- zfs-linux-0.8.2.orig/etc/systemd/system/Makefile.am
-+++ zfs-linux-0.8.2/etc/systemd/system/Makefile.am
-@@ -2,6 +2,7 @@ systemdpreset_DATA = \
- 	50-zfs.preset
- 
- systemdunit_DATA = \
-+	zfs-load-module.service \
- 	zfs-zed.service \
- 	zfs-import-cache.service \
- 	zfs-import-scan.service \
-@@ -13,6 +14,7 @@ systemdunit_DATA = \
- 	zfs.target
- 
- EXTRA_DIST = \
-+	$(top_srcdir)/etc/systemd/system/zfs-load-module.service.in \
- 	$(top_srcdir)/etc/systemd/system/zfs-zed.service.in \
- 	$(top_srcdir)/etc/systemd/system/zfs-import-cache.service.in \
- 	$(top_srcdir)/etc/systemd/system/zfs-import-scan.service.in \
-Index: zfs-linux-0.8.2/etc/systemd/system/zfs-import-cache.service.in
-===================================================================
---- zfs-linux-0.8.2.orig/etc/systemd/system/zfs-import-cache.service.in
-+++ zfs-linux-0.8.2/etc/systemd/system/zfs-import-cache.service.in
-@@ -3,7 +3,9 @@ Description=Import ZFS pools by cache fi
- Documentation=man:zpool(8)
- DefaultDependencies=no
- Requires=systemd-udev-settle.service
-+Requires=zfs-load-module.service
- After=systemd-udev-settle.service
-+After=zfs-load-module.service
- After=cryptsetup.target
- After=systemd-remount-fs.service
- Before=zfs-import.target
-Index: zfs-linux-0.8.2/etc/systemd/system/zfs-import-scan.service.in
-===================================================================
---- zfs-linux-0.8.2.orig/etc/systemd/system/zfs-import-scan.service.in
-+++ zfs-linux-0.8.2/etc/systemd/system/zfs-import-scan.service.in
-@@ -3,7 +3,9 @@ Description=Import ZFS pools by device s
- Documentation=man:zpool(8)
- DefaultDependencies=no
- Requires=systemd-udev-settle.service
-+Requires=zfs-load-module.service
- After=systemd-udev-settle.service
-+Requires=zfs-load-module.service
- After=cryptsetup.target
- Before=zfs-import.target
- ConditionPathExists=!@sysconfdir@/zfs/zpool.cache
-Index: zfs-linux-0.8.2/etc/systemd/system/zfs-load-module.service.in
-===================================================================
---- /dev/null
-+++ zfs-linux-0.8.2/etc/systemd/system/zfs-load-module.service.in
-@@ -0,0 +1,17 @@
-+[Unit]
-+Description=Install ZFS kernel module
-+DefaultDependencies=no
-+Requires=systemd-udev-settle.service
-+After=systemd-udev-settle.service
-+After=cryptsetup.target
-+Before=dracut-mount.service
-+After=systemd-remount-fs.service
-+
-+[Service]
-+Type=oneshot
-+RemainAfterExit=yes
-+ExecStart=/sbin/modprobe zfs
-+
-+[Install]
-+WantedBy=zfs-mount.service
-+WantedBy=zfs.target
-Index: zfs-linux-0.8.2/etc/systemd/system/50-zfs.preset.in
-===================================================================
---- zfs-linux-0.8.2.orig/etc/systemd/system/50-zfs.preset.in
-+++ zfs-linux-0.8.2/etc/systemd/system/50-zfs.preset.in
-@@ -7,3 +7,4 @@ enable zfs-share.service
- enable zfs-zed.service
- enable zfs-volume-wait.service
- enable zfs.target
-+enable zfs-load-module.service
diff '--exclude=.git' -Npaur ubuntu/zfs-linux/debian/patches/2101-zfs-load-module.patch zfs-linux/debian/patches/2101-zfs-load-module.patch
--- ubuntu/zfs-linux/debian/patches/2101-zfs-load-module.patch	1969-12-31 19:00:00.000000000 -0500
+++ zfs-linux/debian/patches/2101-zfs-load-module.patch	2020-05-13 13:20:56.000000000 -0400
@@ -0,0 +1,79 @@
+Index: zfs-linux-0.8.2/etc/systemd/system/Makefile.am
+===================================================================
+--- zfs-linux-0.8.2.orig/etc/systemd/system/Makefile.am
++++ zfs-linux-0.8.2/etc/systemd/system/Makefile.am
+@@ -2,6 +2,7 @@ systemdpreset_DATA = \
+ 	50-zfs.preset
+ 
+ systemdunit_DATA = \
++	zfs-load-module.service \
+ 	zfs-zed.service \
+ 	zfs-import-cache.service \
+ 	zfs-import-scan.service \
+@@ -13,6 +14,7 @@ systemdunit_DATA = \
+ 	zfs.target
+ 
+ EXTRA_DIST = \
++	$(top_srcdir)/etc/systemd/system/zfs-load-module.service.in \
+ 	$(top_srcdir)/etc/systemd/system/zfs-zed.service.in \
+ 	$(top_srcdir)/etc/systemd/system/zfs-import-cache.service.in \
+ 	$(top_srcdir)/etc/systemd/system/zfs-import-scan.service.in \
+Index: zfs-linux-0.8.2/etc/systemd/system/zfs-import-cache.service.in
+===================================================================
+--- zfs-linux-0.8.2.orig/etc/systemd/system/zfs-import-cache.service.in
++++ zfs-linux-0.8.2/etc/systemd/system/zfs-import-cache.service.in
+@@ -3,7 +3,9 @@ Description=Import ZFS pools by cache fi
+ Documentation=man:zpool(8)
+ DefaultDependencies=no
+ Requires=systemd-udev-settle.service
++Requires=zfs-load-module.service
+ After=systemd-udev-settle.service
++After=zfs-load-module.service
+ After=cryptsetup.target
+ After=multipathd.target
+ After=systemd-remount-fs.service
+Index: zfs-linux-0.8.2/etc/systemd/system/zfs-import-scan.service.in
+===================================================================
+--- zfs-linux-0.8.2.orig/etc/systemd/system/zfs-import-scan.service.in
++++ zfs-linux-0.8.2/etc/systemd/system/zfs-import-scan.service.in
+@@ -3,7 +3,9 @@ Description=Import ZFS pools by device s
+ Documentation=man:zpool(8)
+ DefaultDependencies=no
+ Requires=systemd-udev-settle.service
++Requires=zfs-load-module.service
+ After=systemd-udev-settle.service
++Requires=zfs-load-module.service
+ After=cryptsetup.target
+ After=multipathd.target
+ Before=zfs-import.target
+Index: zfs-linux-0.8.2/etc/systemd/system/zfs-load-module.service.in
+===================================================================
+--- /dev/null
++++ zfs-linux-0.8.2/etc/systemd/system/zfs-load-module.service.in
+@@ -0,0 +1,17 @@
++[Unit]
++Description=Install ZFS kernel module
++DefaultDependencies=no
++Requires=systemd-udev-settle.service
++After=systemd-udev-settle.service
++After=cryptsetup.target
++Before=dracut-mount.service
++After=systemd-remount-fs.service
++
++[Service]
++Type=oneshot
++RemainAfterExit=yes
++ExecStart=/sbin/modprobe zfs
++
++[Install]
++WantedBy=zfs-mount.service
++WantedBy=zfs.target
+Index: zfs-linux-0.8.2/etc/systemd/system/50-zfs.preset.in
+===================================================================
+--- zfs-linux-0.8.2.orig/etc/systemd/system/50-zfs.preset.in
++++ zfs-linux-0.8.2/etc/systemd/system/50-zfs.preset.in
+@@ -7,3 +7,4 @@ enable zfs-share.service
+ enable zfs-zed.service
+ enable zfs-volume-wait.service
+ enable zfs.target
++enable zfs-load-module.service
diff '--exclude=.git' -Npaur ubuntu/zfs-linux/debian/patches/4000-zsys-support.patch zfs-linux/debian/patches/4000-zsys-support.patch
--- ubuntu/zfs-linux/debian/patches/4000-zsys-support.patch	2020-05-16 17:46:48.147546638 -0400
+++ zfs-linux/debian/patches/4000-zsys-support.patch	1969-12-31 19:00:00.000000000 -0500
@@ -1,257 +0,0 @@
-Description: Support zsys systems
- Zsys is an enhanced and structured dataset layout for ZFS.
- .
- It enables advanced use cases by differentiating system,
- user data and persistent partitions to allow only partial
- permanent or temporary rollback without destroying intermediate
- snapshots.
-Author: Jean-Baptiste Lallement <jean.baptiste@ubuntu.com>
-        Didier Roche <didrocks@ubuntu.com>
-Last-Update: 2019-06-06
-Index: zfs-linux-0.8.3/contrib/initramfs/scripts/zfs.in
-===================================================================
---- zfs-linux-0.8.3.orig/contrib/initramfs/scripts/zfs.in
-+++ zfs-linux-0.8.3/contrib/initramfs/scripts/zfs.in
-@@ -71,6 +71,20 @@ get_fs_value()
- 	"${ZFS}" get -H -ovalue $value "$fs" 2> /dev/null
- }
- 
-+# Get a ZFS filesystem property value with the source stripped from the value
-+get_fs_value_without_source()
-+{
-+	value="$(get_fs_value $@)"
-+	echo "${value%%:*}"
-+}
-+
-+# Get a ZFS filesystem property source for a given key
-+get_fs_source()
-+{
-+	value="$(get_fs_value $@)"
-+	echo "${value#*:}"
-+}
-+
- # Find the 'bootfs' property on pool $1.
- # If the property does not contain '/', then ignore this
- # pool by exporting it again.
-@@ -495,16 +509,17 @@ clone_snap()
- 	local snap="$1"
- 	local destfs="$2"
- 	local mountpoint="$3"
-+	local additional_parameters="$4"
- 
- 	[ "$quiet" != "y" ] && zfs_log_begin_msg "Cloning '$snap' to '$destfs'"
- 
-+	if [ -n "${mountpoint}" ]; then
-+		additional_parameters="${additional_parameters} -o mountpoint=${mountpoint}"
-+	fi
-+
- 	# Clone the snapshot into a dataset we can boot from
--	# + We don't want this filesystem to be automatically mounted, we
--	#   want control over this here and nowhere else.
--	# + We don't need any mountpoint set for the same reason.
--	# We use the 'org.zol:mountpoint' property to remember the mountpoint.
--	ZFS_CMD="${ZFS} clone -o canmount=noauto -o mountpoint=none"
--	ZFS_CMD="${ZFS_CMD} -o org.zol:mountpoint=${mountpoint}"
-+	ZFS_CMD="${ZFS} clone"
-+	ZFS_CMD="${ZFS_CMD} -o canmount=noauto ${additional_parameters}"
- 	ZFS_CMD="${ZFS_CMD} $snap $destfs"
- 	ZFS_STDERR="$(${ZFS_CMD} 2>&1)"
- 	ZFS_ERROR="$?"
-@@ -616,6 +631,15 @@ setup_snapshot_booting()
- 	snapname="${snap##*@}"
- 	ZFS_BOOTFS="${rootfs}_${snapname}"
- 
-+	# Detect if we are on a zsys system, which will generates an unique UUID
-+	# and override ZFS_BOOTFS
-+	use_zsys=$(get_fs_value_without_source "${rootfs}" com.ubuntu.zsys:bootfs)
-+	if [ "$use_zsys" = "yes" ]; then
-+		zsys_uid=`uid`
-+		ZFS_BOOTFS="${rootfs%_*}_${zsys_uid}" # we strip old uid and add new one
-+	fi
-+
-+	# Rollback won't have effect on zsys system
- 	if ! grep -qiE '(^|[^\\](\\\\)* )(rollback)=(on|yes|1)( |$)' /proc/cmdline
- 	then
- 		# If the destination dataset for the clone
-@@ -645,10 +669,18 @@ setup_snapshot_booting()
- 			#       rpool/ROOT/debian/boot@snap2	=> rpool/ROOT/debian_snap2/boot
- 			#       rpool/ROOT/debian/usr@snap2	=> rpool/ROOT/debian_snap2/usr
- 			#       rpool/ROOT/debian/var@snap2	=> rpool/ROOT/debian_snap2/var
-+			#
-+			# For zsys, we have stable root dataset names with uid, so:
-+			#       rpool/ROOT/debian_uid1@snap2		=> rpool/ROOT/debian_uid2
-+			#       rpool/ROOT/debian_uid1/boot@snap2	=> rpool/ROOT/debian_uid2/boot
-+
- 			subfs="${s##$rootfs}"
- 			subfs="${subfs%%@$snapname}"
- 
- 			destfs="${rootfs}_${snapname}" # base fs.
-+			if [ "${use_zsys}" = "yes" ]; then
-+				destfs="${rootfs%_*}_${zsys_uid}" # we strip old uid and add new one
-+			fi
- 			[ -n "$subfs" ] && destfs="${destfs}$subfs" # + sub fs.
- 
- 			# Get the mountpoint of the filesystem, to be used
-@@ -665,9 +697,38 @@ setup_snapshot_booting()
- 				fi
- 			fi
- 
-+			# On non zsys:
-+			# + We don't want this filesystem to be automatically mounted, we
-+			#   want control over this here and nowhere else.
-+			# + We don't need any mountpoint set for the same reason.
-+			# + We use the 'org.zol:mountpoint' property to remember the mountpoint.
-+			# On zsys:
-+			# + We don't want this filesystem to be automatically mounted, when cloned
-+			#   so, we set canmount=noauto. Zsys early boot will set the current datasets
-+			#   to on, alongside other system datasets switch. This enables
-+			#   zpool import -a -R /altroot to mount the whole system.
-+			#   The initrd script is doing zpool import -N, so we are not impacted by setting
-+			#   canmount=on on secondary boot.
-+			# + We thus need the real mountpoint set for this reason (as we can't set it
-+			#   once the system booted, even if the mountpoint didn't change)
-+			# + We set additional parameters to zsys to mark datasets we want mount manually
-+			#   at boot.
-+			if [ "${use_zsys}" != "yes" ]; then
-+				clone_additional_parameters="-o org.zol:mountpoint=${mountpoint}"
-+				mountpoint=none
-+			else
-+				[ "$(get_fs_value_without_source "$s" com.ubuntu.zsys:bootfs)" != "yes" ] && continue
-+				clone_additional_parameters="-o com.ubuntu.zsys:bootfs=yes"
-+				# Only set mountpoint explicitely if it was locally set
-+				# Keep the possibility to have mountpoint inherited for manual zfs snapshots without zsys involved, which
-+				# will have an empty user propertie
-+				local mountpoint_source="$(get_fs_source "$s" com.ubuntu.zsys:mountpoint)"
-+				[ -n "${mountpoint_source}" -a "${mountpoint_source}" != "local" ] && mountpoint=""
-+			fi
-+
- 			# Clone the snapshot into its own
- 			# filesystem
--			clone_snap "$s" "${destfs}" "${mountpoint}" || \
-+			clone_snap "$s" "${destfs}" "${mountpoint}" "${clone_additional_parameters}" || \
- 			    retval=$((retval + 1))
- 		fi
- 	done
-@@ -930,6 +991,8 @@ mountroot()
- 		# Booting from a snapshot?
- 		# Will overwrite the ZFS_BOOTFS variable like so:
- 		#   rpool/ROOT/debian@snap2 => rpool/ROOT/debian_snap2
-+		#   or
-+		#   rpool/ROOT/debian@snap2 => rpool/ROOT/debian_<uid> if selected system is a zsys one
- 		echo "${ZFS_BOOTFS}" | grep -q '@' && \
- 		    setup_snapshot_booting "${ZFS_BOOTFS}"
- 	fi
-@@ -967,8 +1030,16 @@ mountroot()
- 	# Go through the complete list (recursively) of all filesystems below
- 	# the real root dataset
- 	filesystems=$("${ZFS}" list -oname -tfilesystem -H -r "${ZFS_BOOTFS}")
-+
-+	# If the root filesystem is a zsys one, we select the datasets to mount
-+	# at boot.
-+	# Some datasets under ROOT/ can be mounted on top of persistent datasets
-+	# that are hosted elsewhere in the pool. Those are thus only mounted at
-+	# early boot.
-+	use_zsys=$(get_fs_value_without_source "${ZFS_BOOTFS}" com.ubuntu.zsys:bootfs)
- 	for fs in $filesystems $ZFS_INITRD_ADDITIONAL_DATASETS
- 	do
-+		[ "$use_zsys" = "yes" -a "$(get_fs_value_without_source "$fs" com.ubuntu.zsys:bootfs)" != "yes" ] && continue
- 		mount_fs "$fs"
- 	done
- 
-@@ -1007,3 +1078,8 @@ mountroot()
- 		[ "$quiet" != "y" ] && zfs_log_end_msg
- 	fi
- }
-+
-+uid()
-+{
-+	dd if=/dev/urandom of=/dev/stdout bs=1 count=100 2>/dev/null | tr -dc 'a-z0-9' | cut -c-6
-+}
-Index: zfs-linux-0.8.3/etc/systemd/system-generators/zfs-mount-generator.in
-===================================================================
---- zfs-linux-0.8.3.orig/etc/systemd/system-generators/zfs-mount-generator.in
-+++ zfs-linux-0.8.3/etc/systemd/system-generators/zfs-mount-generator.in
-@@ -256,6 +256,83 @@ EOF
-   ln -s "../${mountfile}" "${req_dir}"
- }
- 
-+ZPOOL_CACHE="@sysconfdir@/zfs/zpool.cache"
-+PROPS="name,mountpoint,canmount,atime,relatime,devices,exec,readonly"
-+PROPS="${PROPS},setuid,nbmand"
-+zsys_revert_failed=0
-+errfile="/tmp/zsys-revert-out.log"
-+
-+drop_emergency_on_failure() {
-+  if [ ${zsys_revert_failed} -eq 0 ]; then
-+    return
-+  fi
-+
-+  # Drop to emergency target in case of failure after cleanup fstab mountpoints.
-+  # This avoids booting and having a mix of old and new datasets, and creating directory in the wrong
-+  # datasets, like /boot/grub in / which will prevent zfs to mount /boot dataset later on.
-+  rm -f "${dest_norm}"/*.mount
-+  ln -s /lib/systemd/system/emergency.target "${dest_norm}"/default.target
-+
-+  printf 'ERROR: zfs-mount-generator failed and you requested a revert:\n' > /dev/kmsg
-+  cat "${errfile}" > /dev/kmsg
-+  printf 'You can reboot on current master dataset to fix the issue\n' > /dev/kmsg
-+}
-+
-+# Handle revert so that zsys prepares all datasets as expected.
-+initzsys() {
-+  if [ ! -x @sbindir@/zsysd ]; then
-+    return
-+  fi
-+
-+  # Non ZFS system
-+  if ! grep -q "root=ZFS=" /proc/cmdline; then
-+    return
-+  fi
-+
-+  # If we boot on the same dataset than last time, assume we don’t need to do anything as the cache file will only
-+  # import desired pools.
-+  bootds="$(sed -e 's/.*root=ZFS=\([^ ]\+\).*/\1/' /proc/cmdline)"
-+  if grep -Eq "${bootds}\s+/\s+on" "${FSLIST}/"*; then
-+      return
-+  fi
-+
-+  # If we get here: we are reverting. Let zsys handle it
-+  trap drop_emergency_on_failure EXIT INT QUIT ABRT PIPE TERM
-+
-+  exec 3>&1 1>"${errfile}"
-+  exec 4>&2 2>&1
-+
-+  zsys_revert_failed=1
-+  # Import and list previously imported pools for zsys
-+  if [ -f "${ZPOOL_CACHE}" ]; then
-+    @sbindir@/zpool import -c "${ZPOOL_CACHE}" -aN
-+  # As a best effort, import all available pools, hoping there is no conflict.
-+  else
-+    echo "We had to search for all available pools because ${ZPOOL_CACHE} doesn't exist. To avoid this, create a zpool cache file."
-+    @sbindir@/zpool import -aN
-+  fi
-+
-+  @sbindir@/zsysd boot-prepare >"${errfile}"
-+
-+  # If FSLIST is empty, populate with all imported pools
-+  if [ -z "$(ls -A ${FSLIST})" ]; then
-+    @sbindir@/zpool list -H | cut -f1 | xargs -I{} touch ${FSLIST}/{}
-+  fi
-+
-+  # Refresh zfs list cache
-+  for cachefile in "${FSLIST}/"* ; do
-+    pool=`basename ${cachefile}`
-+    @sbindir@/zfs list -H -t filesystem -o "${PROPS}" -r "${pool}" >"${cachefile}"
-+  done
-+
-+  exec 1>&3 3>&-
-+  exec 2>&4 4>&-
-+  zsys_revert_failed=0
-+  rm "${errfile}"
-+}
-+
-+initzsys
-+
- # Feed each line into process_line
- for cachefile in "${FSLIST}/"* ; do
-   while read -r fs ; do
diff '--exclude=.git' -Npaur ubuntu/zfs-linux/debian/patches/4001-zsys-support.patch zfs-linux/debian/patches/4001-zsys-support.patch
--- ubuntu/zfs-linux/debian/patches/4001-zsys-support.patch	1969-12-31 19:00:00.000000000 -0500
+++ zfs-linux/debian/patches/4001-zsys-support.patch	2020-05-13 23:46:33.980086518 -0400
@@ -0,0 +1,260 @@
+Description: Support zsys systems
+ Zsys is an enhanced and structured dataset layout for ZFS.
+ .
+ It enables advanced use cases by differentiating system,
+ user data and persistent partitions to allow only partial
+ permanent or temporary rollback without destroying intermediate
+ snapshots.
+Author: Jean-Baptiste Lallement <jean.baptiste@ubuntu.com>
+        Didier Roche <didrocks@ubuntu.com>
+Last-Update: 2019-06-06
+Index: zfs-linux-0.8.3/contrib/initramfs/scripts/zfs.in
+===================================================================
+--- zfs-linux-0.8.3.orig/contrib/initramfs/scripts/zfs
++++ zfs-linux-0.8.3/contrib/initramfs/scripts/zfs
+@@ -62,6 +62,19 @@ get_fs_value()
+ 
+ 	"${ZFS}" get -H -ovalue $value "$fs" 2> /dev/null
+ }
++# Get a ZFS filesystem property value with the source stripped from the value
++get_fs_value_without_source()
++{
++       value="$(get_fs_value $@)"
++       echo "${value%%:*}"
++}
++
++# Get a ZFS filesystem property source for a given key
++get_fs_source()
++{
++       value="$(get_fs_value $@)"
++       echo "${value#*:}"
++}
+ 
+ # Find the 'bootfs' property on pool $1.
+ # If the property does not contain '/', then ignore this
+@@ -487,17 +500,18 @@ clone_snap()
+ 	local snap="$1"
+ 	local destfs="$2"
+ 	local mountpoint="$3"
++        local additional_parameters="$4"
+ 
+ 	[ "$quiet" != "y" ] && zfs_log_begin_msg "Cloning '$snap' to '$destfs'"
+ 
++       if [ -n "${mountpoint}" ]; then
++               additional_parameters="${additional_parameters} -o mountpoint=${mountpoint}"
++       fi
++
+ 	# Clone the snapshot into a dataset we can boot from
+-	# + We don't want this filesystem to be automatically mounted, we
+-	#   want control over this here and nowhere else.
+-	# + We don't need any mountpoint set for the same reason.
+-	# We use the 'org.zol:mountpoint' property to remember the mountpoint.
+-	ZFS_CMD="${ZFS} clone -o canmount=noauto -o mountpoint=none"
+-	ZFS_CMD="${ZFS_CMD} -o org.zol:mountpoint=${mountpoint}"
+-	ZFS_CMD="${ZFS_CMD} $snap $destfs"
++        ZFS_CMD="${ZFS} clone"
++        ZFS_CMD="${ZFS_CMD} -o canmount=noauto ${additional_parameters}"
++        ZFS_CMD="${ZFS_CMD} $snap $destfs"
+ 	ZFS_STDERR="$(${ZFS_CMD} 2>&1)"
+ 	ZFS_ERROR="$?"
+ 	if [ "${ZFS_ERROR}" != 0 ]
+@@ -608,6 +622,15 @@ setup_snapshot_booting()
+ 	snapname="${snap##*@}"
+ 	ZFS_BOOTFS="${rootfs}_${snapname}"
+ 
++       # Detect if we are on a zsys system, which will generates an unique UUID
++       # and override ZFS_BOOTFS
++       use_zsys=$(get_fs_value_without_source "${rootfs}" com.ubuntu.zsys:bootfs)
++       if [ "$use_zsys" = "yes" ]; then
++               zsys_uid=`uid`
++               ZFS_BOOTFS="${rootfs%_*}_${zsys_uid}" # we strip old uid and add new one
++       fi
++
++       # Rollback won't have effect on zsys system
+ 	if ! grep -qiE '(^|[^\\](\\\\)* )(rollback)=(on|yes|1)( |$)' /proc/cmdline
+ 	then
+ 		# If the destination dataset for the clone
+@@ -637,10 +660,19 @@ setup_snapshot_booting()
+ 			#       rpool/ROOT/debian/boot@snap2	=> rpool/ROOT/debian_snap2/boot
+ 			#       rpool/ROOT/debian/usr@snap2	=> rpool/ROOT/debian_snap2/usr
+ 			#       rpool/ROOT/debian/var@snap2	=> rpool/ROOT/debian_snap2/var
++                       #
++                       # For zsys, we have stable root dataset names with uid, so:
++                       #       rpool/ROOT/debian_uid1@snap2            => rpool/ROOT/debian_uid2
++                       #       rpool/ROOT/debian_uid1/boot@snap2       => rpool/ROOT/debian_uid2/boot
++
+ 			subfs="${s##$rootfs}"
+ 			subfs="${subfs%%@$snapname}"
+ 
+ 			destfs="${rootfs}_${snapname}" # base fs.
++                       if [ "${use_zsys}" = "yes" ]; then
++                               destfs="${rootfs%_*}_${zsys_uid}" # we strip old uid and add new one
++                       fi
++
+ 			[ -n "$subfs" ] && destfs="${destfs}$subfs" # + sub fs.
+ 
+ 			# Get the mountpoint of the filesystem, to be used
+@@ -656,10 +688,39 @@ setup_snapshot_booting()
+ 					mountpoint="/"
+ 				fi
+ 			fi
++                       # On non zsys:
++                       # + We don't want this filesystem to be automatically mounted, we
++                       #   want control over this here and nowhere else.
++                       # + We don't need any mountpoint set for the same reason.
++                       # + We use the 'org.zol:mountpoint' property to remember the mountpoint.
++                       # On zsys:
++                       # + We don't want this filesystem to be automatically mounted, when cloned
++                       #   so, we set canmount=noauto. Zsys early boot will set the current datasets
++                       #   to on, alongside other system datasets switch. This enables
++                       #   zpool import -a -R /altroot to mount the whole system.
++                       #   The initrd script is doing zpool import -N, so we are not impacted by setting
++                       #   canmount=on on secondary boot.
++                       # + We thus need the real mountpoint set for this reason (as we can't set it
++                       #   once the system booted, even if the mountpoint didn't change)
++                       # + We set additional parameters to zsys to mark datasets we want mount manually
++                       #   at boot.
++                       if [ "${use_zsys}" != "yes" ]; then
++                               clone_additional_parameters="-o org.zol:mountpoint=${mountpoint}"
++                               mountpoint=none
++                       else
++                               [ "$(get_fs_value_without_source "$s" com.ubuntu.zsys:bootfs)" != "yes" ] && continue
++                               clone_additional_parameters="-o com.ubuntu.zsys:bootfs=yes"
++                               # Only set mountpoint explicitely if it was locally set
++                               # Keep the possibility to have mountpoint inherited for manual zfs snapshots without zsys involved, which
++                               # will have an empty user propertie
++                               local mountpoint_source="$(get_fs_source "$s" com.ubuntu.zsys:mountpoint)"
++                               [ -n "${mountpoint_source}" -a "${mountpoint_source}" != "local" ] && mountpoint=""
++                       fi
++
+ 
+ 			# Clone the snapshot into its own
+ 			# filesystem
+-			clone_snap "$s" "${destfs}" "${mountpoint}" || \
++			clone_snap "$s" "${destfs}" "${mountpoint}" "${clone_additional_parameters}" || \
+ 			    retval=$((retval + 1))
+ 		fi
+ 	done
+@@ -922,6 +983,9 @@ mountroot()
+ 		# Booting from a snapshot?
+ 		# Will overwrite the ZFS_BOOTFS variable like so:
+ 		#   rpool/ROOT/debian@snap2 => rpool/ROOT/debian_snap2
++               #   or
++               #   rpool/ROOT/debian@snap2 => rpool/ROOT/debian_<uid> if selected system is a zsys one
++
+ 		echo "${ZFS_BOOTFS}" | grep -q '@' && \
+ 		    setup_snapshot_booting "${ZFS_BOOTFS}"
+ 	fi
+@@ -959,8 +1023,16 @@ mountroot()
+ 	# Go through the complete list (recursively) of all filesystems below
+ 	# the real root dataset
+ 	filesystems=$("${ZFS}" list -oname -tfilesystem -H -r "${ZFS_BOOTFS}")
++
++       # If the root filesystem is a zsys one, we select the datasets to mount
++       # at boot.
++       # Some datasets under ROOT/ can be mounted on top of persistent datasets
++       # that are hosted elsewhere in the pool. Those are thus only mounted at
++       # early boot.
++        use_zsys=$(get_fs_value_without_source "${ZFS_BOOTFS}" com.ubuntu.zsys:bootfs)
+ 	for fs in $filesystems $ZFS_INITRD_ADDITIONAL_DATASETS
+ 	do
++               [ "$use_zsys" = "yes" -a "$(get_fs_value_without_source "$fs" com.ubuntu.zsys:bootfs)" != "yes" ] && continue
+ 		mount_fs "$fs"
+ 	done
+ 
+@@ -999,3 +1071,8 @@ mountroot()
+ 		[ "$quiet" != "y" ] && zfs_log_end_msg
+ 	fi
+ }
++
++uid()
++{
++       dd if=/dev/urandom of=/dev/stdout bs=1 count=100 2>/dev/null | tr -dc 'a-z0-9' | cut -c-6
++}
+Index: zfs-linux-0.8.3/etc/systemd/system-generators/zfs-mount-generator.in
+===================================================================
+--- zfs-linux-0.8.3.orig/etc/systemd/system-generators/zfs-mount-generator.in
++++ zfs-linux-0.8.3/etc/systemd/system-generators/zfs-mount-generator.in
+@@ -428,6 +428,82 @@ Options=defaults${opts},zfsutil" > "${de
+   create_dependencies "${mountfile}" "requires" "$requiredby"
+ 
+ }
++ZPOOL_CACHE="@sysconfdir@/zfs/zpool.cache"
++PROPS="name,mountpoint,canmount,atime,relatime,devices,exec,readonly"
++PROPS="${PROPS},setuid,nbmand"
++zsys_revert_failed=0
++errfile="/tmp/zsys-revert-out.log"
++
++drop_emergency_on_failure() {
++  if [ ${zsys_revert_failed} -eq 0 ]; then
++    return
++  fi
++
++  # Drop to emergency target in case of failure after cleanup fstab mountpoints.
++  # This avoids booting and having a mix of old and new datasets, and creating directory in the wrong
++  # datasets, like /boot/grub in / which will prevent zfs to mount /boot dataset later on.
++  rm -f "${dest_norm}"/*.mount
++  ln -s /lib/systemd/system/emergency.target "${dest_norm}"/default.target
++
++  printf 'ERROR: zfs-mount-generator failed and you requested a revert:\n' > /dev/kmsg
++  cat "${errfile}" > /dev/kmsg
++  printf 'You can reboot on current master dataset to fix the issue\n' > /dev/kmsg
++}
++
++# Handle revert so that zsys prepares all datasets as expected.
++initzsys() {
++  if [ ! -x @sbindir@/zsysd ]; then
++    return
++  fi
++
++  # Non ZFS system
++  if ! grep -q "root=ZFS=" /proc/cmdline; then
++    return
++  fi
++
++  # If we boot on the same dataset than last time, assume we don’t need to do anything as the cache file will only
++  # import desired pools.
++  bootds="$(sed -e 's/.*root=ZFS=\([^ ]\+\).*/\1/' /proc/cmdline)"
++  if grep -Eq "${bootds}\s+/\s+on" "${FSLIST}/"*; then
++      return
++  fi
++
++  # If we get here: we are reverting. Let zsys handle it
++  trap drop_emergency_on_failure EXIT INT QUIT ABRT PIPE TERM
++
++  exec 3>&1 1>"${errfile}"
++  exec 4>&2 2>&1
++
++  zsys_revert_failed=1
++  # Import and list previously imported pools for zsys
++  if [ -f "${ZPOOL_CACHE}" ]; then
++    @sbindir@/zpool import -c "${ZPOOL_CACHE}" -aN
++  # As a best effort, import all available pools, hoping there is no conflict.
++  else
++    echo "We had to search for all available pools because ${ZPOOL_CACHE} doesn't exist. To avoid this, create a zpool cache file."
++    @sbindir@/zpool import -aN
++  fi
++
++  @sbindir@/zsysd boot-prepare >"${errfile}"
++
++  # If FSLIST is empty, populate with all imported pools
++  if [ -z "$(ls -A ${FSLIST})" ]; then
++    @sbindir@/zpool list -H | cut -f1 | xargs -I{} touch ${FSLIST}/{}
++  fi
++
++  # Refresh zfs list cache
++  for cachefile in "${FSLIST}/"* ; do
++    pool=`basename ${cachefile}`
++    @sbindir@/zfs list -H -t filesystem -o "${PROPS}" -r "${pool}" >"${cachefile}"
++  done
++
++  exec 1>&3 3>&-
++  exec 2>&4 4>&-
++  zsys_revert_failed=0
++  rm "${errfile}"
++}
++
++initzsys
+ 
+ for cachefile in "${FSLIST}/"* ; do
+   # Sort cachefile's lines by canmount, "on" before "noauto"
diff '--exclude=.git' -Npaur ubuntu/zfs-linux/debian/patches/4610-ICP-Improve-AES-GCM-performance.patch zfs-linux/debian/patches/4610-ICP-Improve-AES-GCM-performance.patch
--- ubuntu/zfs-linux/debian/patches/4610-ICP-Improve-AES-GCM-performance.patch	2020-05-16 17:46:19.015566898 -0400
+++ zfs-linux/debian/patches/4610-ICP-Improve-AES-GCM-performance.patch	1969-12-31 19:00:00.000000000 -0500
@@ -1,3115 +0,0 @@
-From 31b160f0a6c673c8f926233af2ed6d5354808393 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Attila=20F=C3=BCl=C3=B6p?= <attila@fueloep.org>
-Date: Mon, 10 Feb 2020 21:59:50 +0100
-Subject: [PATCH] ICP: Improve AES-GCM performance
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-Content-Type: text/plain; charset="utf-8"
-Content-Transfer-Encoding: 8bit
-
-Currently SIMD accelerated AES-GCM performance is limited by two
-factors:
-
-a. The need to disable preemption and interrupts and save the FPU
-state before using it and to do the reverse when done. Due to the
-way the code is organized (see (b) below) we have to pay this price
-twice for each 16 byte GCM block processed.
-
-b. Most processing is done in C, operating on single GCM blocks.
-The use of SIMD instructions is limited to the AES encryption of the
-counter block (AES-NI) and the Galois multiplication (PCLMULQDQ).
-This leads to the FPU not being fully utilized for crypto
-operations.
-
-To solve (a) we do crypto processing in larger chunks while owning
-the FPU. An `icp_gcm_avx_chunk_size` module parameter was introduced
-to make this chunk size tweakable. It defaults to 32 KiB. This step
-alone roughly doubles performance. (b) is tackled by porting and
-using the highly optimized openssl AES-GCM assembler routines, which
-do all the processing (CTR, AES, GMULT) in a single routine. Both
-steps together result in up to 32x reduction of the time spend in
-the en/decryption routines, leading up to approximately 12x
-throughput increase for large (128 KiB) blocks.
-
-Lastly, this commit changes the default encryption algorithm from
-AES-CCM to AES-GCM when setting the `encryption=on` property.
-
-Reviewed-By: Brian Behlendorf <behlendorf1@llnl.gov>
-Reviewed-By: Jason King <jason.king@joyent.com>
-Reviewed-By: Tom Caputi <tcaputi@datto.com>
-Reviewed-By: Richard Laager <rlaager@wiktel.com>
-Signed-off-by: Attila Fülöp <attila@fueloep.org>
-Closes #9749
-Signed-off-by: Colin Ian King <colin.king@canonical.com>
----
- COPYRIGHT                                     |   4 +
- config/toolchain-simd.m4                      |  21 +
- include/linux/simd_x86.h                      |  13 +
- include/sys/zio.h                             |   2 +-
- lib/libicp/Makefile.am                        |   2 +
- include/linux/simd.h                          |  15 +-
- man/man8/zfsprops.8                           |   2 +-
- module/icp/Makefile.in                        |   9 +
- module/icp/algs/modes/gcm.c                   | 746 ++++++++++++++-
- .../modes/THIRDPARTYLICENSE.cryptogams        |  36 +
- .../THIRDPARTYLICENSE.cryptogams.descrip      |   1 +
- .../modes/THIRDPARTYLICENSE.openssl           | 177 ++++
- .../modes/THIRDPARTYLICENSE.openssl.descrip   |   1 +
- .../icp/asm-x86_64/modes/aesni-gcm-x86_64.S   | 892 ++++++++++++++++++
- module/icp/asm-x86_64/modes/ghash-x86_64.S    | 714 ++++++++++++++
- module/icp/include/aes/aes_impl.h             |   5 +
- module/icp/include/modes/modes.h              |  29 +-
- .../zfs_create/zfs_create_crypt_combos.ksh    |   2 +-
- .../zpool_create_crypt_combos.ksh             |   2 +-
- .../functional/rsend/send_encrypted_props.ksh |  12 +-
- 20 files changed, 2654 insertions(+), 31 deletions(-)
- create mode 100644 module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
- create mode 100644 module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip
- create mode 100644 module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
- create mode 100644 module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip
- create mode 100644 module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
- create mode 100644 module/icp/asm-x86_64/modes/ghash-x86_64.S
-
-Index: zfs-linux-0.8.3/COPYRIGHT
-===================================================================
---- zfs-linux-0.8.3.orig/COPYRIGHT
-+++ zfs-linux-0.8.3/COPYRIGHT
-@@ -20,6 +20,10 @@ notable exceptions and their respective
-   * AES Implementation: module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl
-   * PBKDF2 Implementation: lib/libzfs/THIRDPARTYLICENSE.openssl
-   * SPL Implementation: module/spl/THIRDPARTYLICENSE.gplv2
-+  * GCM Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
-+  * GCM Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
-+  * GHASH Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
-+  * GHASH Implementaion: module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
- 
- This product includes software developed by the OpenSSL Project for use
- in the OpenSSL Toolkit (http://www.openssl.org/)
-Index: zfs-linux-0.8.3/config/toolchain-simd.m4
-===================================================================
---- zfs-linux-0.8.3.orig/config/toolchain-simd.m4
-+++ zfs-linux-0.8.3/config/toolchain-simd.m4
-@@ -23,6 +23,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN
- 			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL
- 			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES
- 			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ
-+			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE
- 			;;
- 	esac
- ])
-@@ -400,4 +401,24 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BU
- 	], [
- 		AC_MSG_RESULT([no])
- 	])
-+])
-+
-+dnl #
-+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE
-+dnl #
-+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE], [
-+	AC_MSG_CHECKING([whether host toolchain supports MOVBE])
-+
-+	AC_LINK_IFELSE([AC_LANG_SOURCE([
-+	[
-+		void main()
-+		{
-+			__asm__ __volatile__("movbe 0(%eax), %eax");
-+		}
-+	]])], [
-+		AC_MSG_RESULT([yes])
-+		AC_DEFINE([HAVE_MOVBE], 1, [Define if host toolchain supports MOVBE])
-+	], [
-+		AC_MSG_RESULT([no])
-+	])
- ])
-Index: zfs-linux-0.8.3/include/linux/simd_x86.h
-===================================================================
---- zfs-linux-0.8.3.orig/include/linux/simd_x86.h
-+++ zfs-linux-0.8.3/include/linux/simd_x86.h
-@@ -382,7 +382,8 @@ typedef enum cpuid_inst_sets {
- 	AVX512ER,
- 	AVX512VL,
- 	AES,
--	PCLMULQDQ
-+	PCLMULQDQ,
-+	MOVBE
- } cpuid_inst_sets_t;
- 
- /*
-@@ -406,6 +407,7 @@ typedef struct cpuid_feature_desc {
- #define	_AVX512VL_BIT		(1U << 31) /* if used also check other levels */
- #define	_AES_BIT		(1U << 25)
- #define	_PCLMULQDQ_BIT		(1U << 1)
-+#define _MOVBE_BIT		(1U << 22)
- 
- /*
-  * Descriptions of supported instruction sets
-@@ -433,6 +435,7 @@ static const cpuid_feature_desc_t cpuid_
- 	[AVX512VL]	= {7U, 0U, _AVX512ER_BIT,	EBX	},
- 	[AES]		= {1U, 0U, _AES_BIT,		ECX	},
- 	[PCLMULQDQ]	= {1U, 0U, _PCLMULQDQ_BIT,	ECX	},
-+	[MOVBE]		= {1U, 0U, _MOVBE_BIT,		ECX	},
- };
- 
- /*
-@@ -505,6 +508,7 @@ CPUID_FEATURE_CHECK(avx512er, AVX512ER);
- CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
- CPUID_FEATURE_CHECK(aes, AES);
- CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
-+CPUID_FEATURE_CHECK(movbe, MOVBE);
- 
- #endif /* !defined(_KERNEL) */
- 
-@@ -719,6 +723,19 @@ zfs_pclmulqdq_available(void)
- #endif
- }
- 
-+/*
-+ * Check if MOVBE instruction is available
-+ */
-+static inline boolean_t
-+zfs_movbe_available(void)
-+{
-+#if defined(X86_FEATURE_MOVBE)
-+	return (!!boot_cpu_has(X86_FEATURE_MOVBE));
-+#else
-+	return (B_FALSE);
-+#endif
-+}
-+
- /*
-  * AVX-512 family of instruction sets:
-  *
-Index: zfs-linux-0.8.3/include/sys/zio.h
-===================================================================
---- zfs-linux-0.8.3.orig/include/sys/zio.h
-+++ zfs-linux-0.8.3/include/sys/zio.h
-@@ -118,7 +118,7 @@ enum zio_encrypt {
- 	ZIO_CRYPT_FUNCTIONS
- };
- 
--#define	ZIO_CRYPT_ON_VALUE	ZIO_CRYPT_AES_256_CCM
-+#define	ZIO_CRYPT_ON_VALUE	ZIO_CRYPT_AES_256_GCM
- #define	ZIO_CRYPT_DEFAULT	ZIO_CRYPT_OFF
- 
- /* macros defining encryption lengths */
-Index: zfs-linux-0.8.3/lib/libicp/Makefile.am
-===================================================================
---- zfs-linux-0.8.3.orig/lib/libicp/Makefile.am
-+++ zfs-linux-0.8.3/lib/libicp/Makefile.am
-@@ -20,6 +20,8 @@ ASM_SOURCES_AS = \
- 	asm-x86_64/aes/aes_amd64.S \
- 	asm-x86_64/aes/aes_aesni.S \
- 	asm-x86_64/modes/gcm_pclmulqdq.S \
-+	asm-x86_64/modes/aesni-gcm-x86_64.S \
-+	asm-x86_64/modes/ghash-x86_64.S \
- 	asm-x86_64/sha1/sha1-x86_64.S \
- 	asm-x86_64/sha2/sha256_impl.S \
- 	asm-x86_64/sha2/sha512_impl.S
-Index: zfs-linux-0.8.3/module/icp/Makefile.in
-===================================================================
---- zfs-linux-0.8.3.orig/module/icp/Makefile.in
-+++ zfs-linux-0.8.3/module/icp/Makefile.in
-@@ -69,9 +69,18 @@ $(MODULE)-objs += algs/skein/skein_iv.o
- $(MODULE)-objs += $(ASM_SOURCES)
- 
- $(MODULE)-$(CONFIG_X86) += algs/modes/gcm_pclmulqdq.o
-+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/aesni-gcm-x86_64.o
-+$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/ghash-x86_64.o
- $(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_aesni.o
- $(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_x86-64.o
- 
-+# Suppress objtool "can't find jump dest instruction at" warnings.  They
-+# are caused by the constants which are defined in the text section of the
-+# assembly file using .byte instructions (e.g. bswap_mask).  The objtool
-+# utility tries to interpret them as opcodes and obviously fails doing so.
-+OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y
-+OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y
-+
- ICP_DIRS = \
- 	api \
- 	core \
-Index: zfs-linux-0.8.3/module/icp/algs/modes/gcm.c
-===================================================================
---- zfs-linux-0.8.3.orig/module/icp/algs/modes/gcm.c
-+++ zfs-linux-0.8.3/module/icp/algs/modes/gcm.c
-@@ -30,12 +30,46 @@
- #include <sys/byteorder.h>
- #include <modes/gcm_impl.h>
- #include <linux/simd.h>
-+#ifdef CAN_USE_GCM_ASM
-+#include <aes/aes_impl.h>
-+#endif
- 
- #define	GHASH(c, d, t, o) \
- 	xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
- 	(o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
- 	(uint64_t *)(void *)(t));
- 
-+/* Select GCM implementation */
-+#define	IMPL_FASTEST	(UINT32_MAX)
-+#define	IMPL_CYCLE	(UINT32_MAX-1)
-+#ifdef CAN_USE_GCM_ASM
-+#define	IMPL_AVX	(UINT32_MAX-2)
-+#endif
-+#define	GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
-+static uint32_t icp_gcm_impl = IMPL_FASTEST;
-+static uint32_t user_sel_impl = IMPL_FASTEST;
-+
-+#ifdef CAN_USE_GCM_ASM
-+/*
-+ * Whether to use the optimized openssl gcm and ghash implementations.
-+ * Set to true if module parameter icp_gcm_impl == "avx".
-+ */
-+static boolean_t gcm_use_avx = B_FALSE;
-+#define	GCM_IMPL_USE_AVX	(*(volatile boolean_t *)&gcm_use_avx)
-+
-+static inline boolean_t gcm_avx_will_work(void);
-+static inline void gcm_set_avx(boolean_t);
-+static inline boolean_t gcm_toggle_avx(void);
-+
-+static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
-+    crypto_data_t *, size_t);
-+
-+static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
-+static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
-+static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *,
-+    size_t, size_t);
-+#endif /* ifdef CAN_USE_GCM_ASM */
-+
- /*
-  * Encrypt multiple blocks of data in GCM mode.  Decrypt for GCM mode
-  * is done in another function.
-@@ -47,6 +81,12 @@ gcm_mode_encrypt_contiguous_blocks(gcm_c
-     void (*copy_block)(uint8_t *, uint8_t *),
-     void (*xor_block)(uint8_t *, uint8_t *))
- {
-+#ifdef CAN_USE_GCM_ASM
-+	if (ctx->gcm_use_avx == B_TRUE)
-+		return (gcm_mode_encrypt_contiguous_blocks_avx(
-+		    ctx, data, length, out, block_size));
-+#endif
-+
- 	const gcm_impl_ops_t *gops;
- 	size_t remainder = length;
- 	size_t need = 0;
-@@ -109,6 +149,14 @@ gcm_mode_encrypt_contiguous_blocks(gcm_c
- 
- 		ctx->gcm_processed_data_len += block_size;
- 
-+		/*
-+		 * The following copies a complete GCM block back to where it
-+		 * came from if there was a remainder in the last call and out
-+		 * is NULL. That doesn't seem to make sense. So we assert this
-+		 * can't happen and leave the code in for reference.
-+		 * See https://github.com/zfsonlinux/zfs/issues/9661
-+		 */
-+		ASSERT(out != NULL);
- 		if (out == NULL) {
- 			if (ctx->gcm_remainder_len > 0) {
- 				bcopy(blockp, ctx->gcm_copy_to,
-@@ -169,6 +217,11 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto
-     void (*copy_block)(uint8_t *, uint8_t *),
-     void (*xor_block)(uint8_t *, uint8_t *))
- {
-+#ifdef CAN_USE_GCM_ASM
-+	if (ctx->gcm_use_avx == B_TRUE)
-+		return (gcm_encrypt_final_avx(ctx, out, block_size));
-+#endif
-+
- 	const gcm_impl_ops_t *gops;
- 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
- 	uint8_t *ghash, *macp = NULL;
-@@ -321,6 +374,11 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto
-     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
-     void (*xor_block)(uint8_t *, uint8_t *))
- {
-+#ifdef CAN_USE_GCM_ASM
-+	if (ctx->gcm_use_avx == B_TRUE)
-+		return (gcm_decrypt_final_avx(ctx, out, block_size));
-+#endif
-+
- 	const gcm_impl_ops_t *gops;
- 	size_t pt_len;
- 	size_t remainder;
-@@ -526,6 +584,9 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *
- 	return (CRYPTO_SUCCESS);
- }
- 
-+/*
-+ * Init the GCM context struct. Handle the cycle and avx implementations here.
-+ */
- int
- gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
-     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
-@@ -556,11 +617,37 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *p
- 		return (CRYPTO_MECHANISM_PARAM_INVALID);
- 	}
- 
--	if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
--	    gcm_param->pAAD, gcm_param->ulAADLen, block_size,
--	    encrypt_block, copy_block, xor_block) != 0) {
--		rv = CRYPTO_MECHANISM_PARAM_INVALID;
-+#ifdef CAN_USE_GCM_ASM
-+	/*
-+	 * Handle the "cycle" implementation by creating avx and non avx
-+	 * contexts alternately.
-+	 */
-+	if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
-+		gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
-+	} else {
-+		gcm_ctx->gcm_use_avx = gcm_toggle_avx();
- 	}
-+	/* We don't handle byte swapped key schedules in the avx code path. */
-+	aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
-+	if (ks->ops->needs_byteswap == B_TRUE) {
-+		gcm_ctx->gcm_use_avx = B_FALSE;
-+	}
-+	/* Avx and non avx context initialization differs from here on. */
-+	if (gcm_ctx->gcm_use_avx == B_FALSE) {
-+#endif /* ifdef CAN_USE_GCM_ASM */
-+		if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
-+		    gcm_param->pAAD, gcm_param->ulAADLen, block_size,
-+		    encrypt_block, copy_block, xor_block) != 0) {
-+			rv = CRYPTO_MECHANISM_PARAM_INVALID;
-+		}
-+#ifdef CAN_USE_GCM_ASM
-+	} else {
-+		if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
-+		    gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) {
-+			rv = CRYPTO_MECHANISM_PARAM_INVALID;
-+		}
-+	}
-+#endif /* ifdef CAN_USE_GCM_ASM */
- 
- 	return (rv);
- }
-@@ -590,11 +677,37 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *
- 		return (CRYPTO_MECHANISM_PARAM_INVALID);
- 	}
- 
--	if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
--	    gmac_param->pAAD, gmac_param->ulAADLen, block_size,
--	    encrypt_block, copy_block, xor_block) != 0) {
--		rv = CRYPTO_MECHANISM_PARAM_INVALID;
-+#ifdef CAN_USE_GCM_ASM
-+	/*
-+	 * Handle the "cycle" implementation by creating avx and non avx
-+	 * contexts alternately.
-+	 */
-+	if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
-+		gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
-+	} else {
-+		gcm_ctx->gcm_use_avx = gcm_toggle_avx();
-+	}
-+	/* We don't handle byte swapped key schedules in the avx code path. */
-+	aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
-+	if (ks->ops->needs_byteswap == B_TRUE) {
-+		gcm_ctx->gcm_use_avx = B_FALSE;
-+	}
-+	/* Avx and non avx context initialization differs from here on. */
-+	if (gcm_ctx->gcm_use_avx == B_FALSE) {
-+#endif	/* ifdef CAN_USE_GCM_ASM */
-+		if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
-+		    gmac_param->pAAD, gmac_param->ulAADLen, block_size,
-+		    encrypt_block, copy_block, xor_block) != 0) {
-+			rv = CRYPTO_MECHANISM_PARAM_INVALID;
-+		}
-+#ifdef CAN_USE_GCM_ASM
-+	} else {
-+		if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
-+		    gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) {
-+			rv = CRYPTO_MECHANISM_PARAM_INVALID;
-+		}
- 	}
-+#endif /* ifdef CAN_USE_GCM_ASM */
- 
- 	return (rv);
- }
-@@ -645,15 +758,6 @@ const gcm_impl_ops_t *gcm_all_impl[] = {
- /* Indicate that benchmark has been completed */
- static boolean_t gcm_impl_initialized = B_FALSE;
- 
--/* Select GCM implementation */
--#define	IMPL_FASTEST	(UINT32_MAX)
--#define	IMPL_CYCLE	(UINT32_MAX-1)
--
--#define	GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
--
--static uint32_t icp_gcm_impl = IMPL_FASTEST;
--static uint32_t user_sel_impl = IMPL_FASTEST;
--
- /* Hold all supported implementations */
- static size_t gcm_supp_impl_cnt = 0;
- static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
-@@ -685,6 +789,16 @@ gcm_impl_get_ops()
- 		size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
- 		ops = gcm_supp_impl[idx];
- 		break;
-+#ifdef CAN_USE_GCM_ASM
-+	case IMPL_AVX:
-+		/*
-+		 * Make sure that we return a valid implementation while
-+		 * switching to the avx implementation since there still
-+		 * may be unfinished non-avx contexts around.
-+		 */
-+		ops = &gcm_generic_impl;
-+		break;
-+#endif
- 	default:
- 		ASSERT3U(impl, <, gcm_supp_impl_cnt);
- 		ASSERT3U(gcm_supp_impl_cnt, >, 0);
-@@ -733,6 +847,16 @@ gcm_impl_init(void)
- 
- 	strcpy(gcm_fastest_impl.name, "fastest");
- 
-+#ifdef CAN_USE_GCM_ASM
-+	/*
-+	 * Use the avx implementation if it's available and the implementation
-+	 * hasn't changed from its default value of fastest on module load.
-+	 */
-+	if (gcm_avx_will_work() &&
-+	    GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
-+		gcm_set_avx(B_TRUE);
-+	}
-+#endif
- 	/* Finish initialization */
- 	atomic_swap_32(&icp_gcm_impl, user_sel_impl);
- 	gcm_impl_initialized = B_TRUE;
-@@ -744,6 +868,9 @@ static const struct {
- } gcm_impl_opts[] = {
- 		{ "cycle",	IMPL_CYCLE },
- 		{ "fastest",	IMPL_FASTEST },
-+#ifdef CAN_USE_GCM_ASM
-+		{ "avx",	IMPL_AVX },
-+#endif
- };
- 
- /*
-@@ -777,6 +904,12 @@ gcm_impl_set(const char *val)
- 
- 	/* Check mandatory options */
- 	for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
-+#ifdef CAN_USE_GCM_ASM
-+		/* Ignore avx implementation if it won't work. */
-+		if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
-+			continue;
-+		}
-+#endif
- 		if (strcmp(req_name, gcm_impl_opts[i].name) == 0) {
- 			impl = gcm_impl_opts[i].sel;
- 			err = 0;
-@@ -795,6 +928,18 @@ gcm_impl_set(const char *val)
- 			}
- 		}
- 	}
-+#ifdef CAN_USE_GCM_ASM
-+	/*
-+	 * Use the avx implementation if available and the requested one is
-+	 * avx or fastest.
-+	 */
-+	if (gcm_avx_will_work() == B_TRUE &&
-+	    (impl == IMPL_AVX || impl == IMPL_FASTEST)) {
-+		gcm_set_avx(B_TRUE);
-+	} else {
-+		gcm_set_avx(B_FALSE);
-+	}
-+#endif
- 
- 	if (err == 0) {
- 		if (gcm_impl_initialized)
-@@ -826,6 +971,12 @@ icp_gcm_impl_get(char *buffer, zfs_kerne
- 
- 	/* list mandatory options */
- 	for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
-+#ifdef CAN_USE_GCM_ASM
-+		/* Ignore avx implementation if it won't work. */
-+		if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
-+			continue;
-+		}
-+#endif
- 		fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s ";
- 		cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name);
- 	}
-@@ -842,4 +993,563 @@ icp_gcm_impl_get(char *buffer, zfs_kerne
- module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get,
-     NULL, 0644);
- MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
--#endif
-+#endif /* defined(__KERNEL) */
-+
-+#ifdef CAN_USE_GCM_ASM
-+#define	GCM_BLOCK_LEN 16
-+/*
-+ * The openssl asm routines are 6x aggregated and need that many bytes
-+ * at minimum.
-+ */
-+#define	GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6)
-+#define	GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3)
-+/*
-+ * Ensure the chunk size is reasonable since we are allocating a
-+ * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts.
-+ */
-+#define	GCM_AVX_MAX_CHUNK_SIZE \
-+	(((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES)
-+
-+/* Get the chunk size module parameter. */
-+#define	GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
-+
-+/* Clear the FPU registers since they hold sensitive internal state. */
-+#define	clear_fpu_regs() clear_fpu_regs_avx()
-+#define	GHASH_AVX(ctx, in, len) \
-+    gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t (*)[2])(ctx)->gcm_Htable, \
-+    in, len)
-+
-+#define	gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
-+
-+/*
-+ * Module parameter: number of bytes to process at once while owning the FPU.
-+ * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is
-+ * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES.
-+ */
-+static uint32_t gcm_avx_chunk_size =
-+	((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
-+
-+extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
-+extern void clear_fpu_regs_avx(void);
-+extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
-+extern void aes_encrypt_intel(const uint32_t rk[], int nr,
-+    const uint32_t pt[4], uint32_t ct[4]);
-+
-+extern void gcm_init_htab_avx(uint64_t Htable[16][2], const uint64_t H[2]);
-+extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t Htable[16][2],
-+    const uint8_t *in, size_t len);
-+
-+extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
-+    const void *, uint64_t *, uint64_t *);
-+
-+extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
-+    const void *, uint64_t *, uint64_t *);
-+
-+static inline boolean_t
-+gcm_avx_will_work(void)
-+{
-+	/* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */
-+	return (kfpu_allowed() &&
-+	    zfs_avx_available() && zfs_movbe_available() &&
-+	    zfs_aes_available() && zfs_pclmulqdq_available());
-+}
-+
-+static inline void
-+gcm_set_avx(boolean_t val)
-+{
-+	if (gcm_avx_will_work() == B_TRUE) {
-+		atomic_swap_32(&gcm_use_avx, val);
-+	}
-+}
-+
-+static inline boolean_t
-+gcm_toggle_avx(void)
-+{
-+	if (gcm_avx_will_work() == B_TRUE) {
-+		return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX));
-+	} else {
-+		return (B_FALSE);
-+	}
-+}
-+
-+/*
-+ * Clear senssitve data in the context.
-+ *
-+ * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and
-+ * ctx->gcm_Htable contain the hash sub key which protects authentication.
-+ *
-+ * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for
-+ * a known plaintext attack, they consists of the IV and the first and last
-+ * counter respectively. If they should be cleared is debatable.
-+ */
-+static inline void
-+gcm_clear_ctx(gcm_ctx_t *ctx)
-+{
-+	bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder));
-+	bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
-+	bzero(ctx->gcm_Htable, sizeof (ctx->gcm_Htable));
-+	bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0));
-+	bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp));
-+}
-+
-+/* Increment the GCM counter block by n. */
-+static inline void
-+gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n)
-+{
-+	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
-+	uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask);
-+
-+	counter = htonll(counter + n);
-+	counter &= counter_mask;
-+	ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
-+}
-+
-+/*
-+ * Encrypt multiple blocks of data in GCM mode.
-+ * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines
-+ * if possible. While processing a chunk the FPU is "locked".
-+ */
-+static int
-+gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
-+    size_t length, crypto_data_t *out, size_t block_size)
-+{
-+	size_t bleft = length;
-+	size_t need = 0;
-+	size_t done = 0;
-+	uint8_t *datap = (uint8_t *)data;
-+	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
-+	const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
-+	uint64_t *ghash = ctx->gcm_ghash;
-+	uint64_t *cb = ctx->gcm_cb;
-+	uint8_t *ct_buf = NULL;
-+	uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
-+	int rv = CRYPTO_SUCCESS;
-+
-+	ASSERT(block_size == GCM_BLOCK_LEN);
-+	/*
-+	 * If the last call left an incomplete block, try to fill
-+	 * it first.
-+	 */
-+	if (ctx->gcm_remainder_len > 0) {
-+		need = block_size - ctx->gcm_remainder_len;
-+		if (length < need) {
-+			/* Accumulate bytes here and return. */
-+			bcopy(datap, (uint8_t *)ctx->gcm_remainder +
-+			    ctx->gcm_remainder_len, length);
-+
-+			ctx->gcm_remainder_len += length;
-+			if (ctx->gcm_copy_to == NULL) {
-+				ctx->gcm_copy_to = datap;
-+			}
-+			return (CRYPTO_SUCCESS);
-+		} else {
-+			/* Complete incomplete block. */
-+			bcopy(datap, (uint8_t *)ctx->gcm_remainder +
-+			    ctx->gcm_remainder_len, need);
-+
-+			ctx->gcm_copy_to = NULL;
-+		}
-+	}
-+
-+	/* Allocate a buffer to encrypt to if there is enough input. */
-+	if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
-+		ct_buf = vmem_alloc(chunk_size, ctx->gcm_kmflag);
-+		if (ct_buf == NULL) {
-+			return (CRYPTO_HOST_MEMORY);
-+		}
-+	}
-+
-+	/* If we completed an incomplete block, encrypt and write it out. */
-+	if (ctx->gcm_remainder_len > 0) {
-+		kfpu_begin();
-+		aes_encrypt_intel(key->encr_ks.ks32, key->nr,
-+		    (const uint32_t *)cb, (uint32_t *)tmp);
-+
-+		gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp);
-+		GHASH_AVX(ctx, tmp, block_size);
-+		clear_fpu_regs();
-+		kfpu_end();
-+		/*
-+		 * We don't follow gcm_mode_encrypt_contiguous_blocks() here
-+		 * but assert that out is not null.
-+		 * See gcm_mode_encrypt_contiguous_blocks() above and
-+		 * https://github.com/zfsonlinux/zfs/issues/9661
-+		 */
-+		ASSERT(out != NULL);
-+		rv = crypto_put_output_data(tmp, out, block_size);
-+		out->cd_offset += block_size;
-+		gcm_incr_counter_block(ctx);
-+		ctx->gcm_processed_data_len += block_size;
-+		bleft -= need;
-+		datap += need;
-+		ctx->gcm_remainder_len = 0;
-+	}
-+
-+	/* Do the bulk encryption in chunk_size blocks. */
-+	for (; bleft >= chunk_size; bleft -= chunk_size) {
-+		kfpu_begin();
-+		done = aesni_gcm_encrypt(
-+		    datap, ct_buf, chunk_size, key, cb, ghash);
-+
-+		clear_fpu_regs();
-+		kfpu_end();
-+		if (done != chunk_size) {
-+			rv = CRYPTO_FAILED;
-+			goto out_nofpu;
-+		}
-+		if (out != NULL) {
-+			rv = crypto_put_output_data(ct_buf, out, chunk_size);
-+			if (rv != CRYPTO_SUCCESS) {
-+				goto out_nofpu;
-+			}
-+			out->cd_offset += chunk_size;
-+		}
-+		datap += chunk_size;
-+		ctx->gcm_processed_data_len += chunk_size;
-+	}
-+	/* Check if we are already done. */
-+	if (bleft == 0) {
-+		goto out_nofpu;
-+	}
-+	/* Bulk encrypt the remaining data. */
-+	kfpu_begin();
-+	if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
-+		done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash);
-+		if (done == 0) {
-+			rv = CRYPTO_FAILED;
-+			goto out;
-+		}
-+		if (out != NULL) {
-+			rv = crypto_put_output_data(ct_buf, out, done);
-+			if (rv != CRYPTO_SUCCESS) {
-+				goto out;
-+			}
-+			out->cd_offset += done;
-+		}
-+		ctx->gcm_processed_data_len += done;
-+		datap += done;
-+		bleft -= done;
-+
-+	}
-+	/* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */
-+	while (bleft > 0) {
-+		if (bleft < block_size) {
-+			bcopy(datap, ctx->gcm_remainder, bleft);
-+			ctx->gcm_remainder_len = bleft;
-+			ctx->gcm_copy_to = datap;
-+			goto out;
-+		}
-+		/* Encrypt, hash and write out. */
-+		aes_encrypt_intel(key->encr_ks.ks32, key->nr,
-+		    (const uint32_t *)cb, (uint32_t *)tmp);
-+
-+		gcm_xor_avx(datap, tmp);
-+		GHASH_AVX(ctx, tmp, block_size);
-+		if (out != NULL) {
-+			rv = crypto_put_output_data(tmp, out, block_size);
-+			if (rv != CRYPTO_SUCCESS) {
-+				goto out;
-+			}
-+			out->cd_offset += block_size;
-+		}
-+		gcm_incr_counter_block(ctx);
-+		ctx->gcm_processed_data_len += block_size;
-+		datap += block_size;
-+		bleft -= block_size;
-+	}
-+out:
-+	clear_fpu_regs();
-+	kfpu_end();
-+out_nofpu:
-+	if (ct_buf != NULL) {
-+		vmem_free(ct_buf, chunk_size);
-+	}
-+	return (rv);
-+}
-+
-+/*
-+ * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual
-+ * incomplete last block. Encrypt the ICB. Calculate the tag and write it out.
-+ */
-+static int
-+gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
-+{
-+	uint8_t *ghash = (uint8_t *)ctx->gcm_ghash;
-+	uint32_t *J0 = (uint32_t *)ctx->gcm_J0;
-+	uint8_t *remainder = (uint8_t *)ctx->gcm_remainder;
-+	size_t rem_len = ctx->gcm_remainder_len;
-+	const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
-+	int aes_rounds = ((aes_key_t *)keysched)->nr;
-+	int rv;
-+
-+	ASSERT(block_size == GCM_BLOCK_LEN);
-+
-+	if (out->cd_length < (rem_len + ctx->gcm_tag_len)) {
-+		return (CRYPTO_DATA_LEN_RANGE);
-+	}
-+
-+	kfpu_begin();
-+	/* Pad last incomplete block with zeros, encrypt and hash. */
-+	if (rem_len > 0) {
-+		uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
-+		const uint32_t *cb = (uint32_t *)ctx->gcm_cb;
-+
-+		aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp);
-+		bzero(remainder + rem_len, block_size - rem_len);
-+		for (int i = 0; i < rem_len; i++) {
-+			remainder[i] ^= tmp[i];
-+		}
-+		GHASH_AVX(ctx, remainder, block_size);
-+		ctx->gcm_processed_data_len += rem_len;
-+		/* No need to increment counter_block, it's the last block. */
-+	}
-+	/* Finish tag. */
-+	ctx->gcm_len_a_len_c[1] =
-+	    htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
-+	GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size);
-+	aes_encrypt_intel(keysched, aes_rounds, J0, J0);
-+
-+	gcm_xor_avx((uint8_t *)J0, ghash);
-+	clear_fpu_regs();
-+	kfpu_end();
-+
-+	/* Output remainder. */
-+	if (rem_len > 0) {
-+		rv = crypto_put_output_data(remainder, out, rem_len);
-+		if (rv != CRYPTO_SUCCESS)
-+			return (rv);
-+	}
-+	out->cd_offset += rem_len;
-+	ctx->gcm_remainder_len = 0;
-+	rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
-+	if (rv != CRYPTO_SUCCESS)
-+		return (rv);
-+
-+	out->cd_offset += ctx->gcm_tag_len;
-+	/* Clear sensitive data in the context before returning. */
-+	gcm_clear_ctx(ctx);
-+	return (CRYPTO_SUCCESS);
-+}
-+
-+/*
-+ * Finalize decryption: We just have accumulated crypto text, so now we
-+ * decrypt it here inplace.
-+ */
-+static int
-+gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
-+{
-+	ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len);
-+	ASSERT3U(block_size, ==, 16);
-+
-+	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
-+	size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
-+	uint8_t *datap = ctx->gcm_pt_buf;
-+	const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
-+	uint32_t *cb = (uint32_t *)ctx->gcm_cb;
-+	uint64_t *ghash = ctx->gcm_ghash;
-+	uint32_t *tmp = (uint32_t *)ctx->gcm_tmp;
-+	int rv = CRYPTO_SUCCESS;
-+	size_t bleft, done;
-+
-+	/*
-+	 * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be
-+	 * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of
-+	 * GCM_AVX_MIN_DECRYPT_BYTES.
-+	 */
-+	for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) {
-+		kfpu_begin();
-+		done = aesni_gcm_decrypt(datap, datap, chunk_size,
-+		    (const void *)key, ctx->gcm_cb, ghash);
-+		clear_fpu_regs();
-+		kfpu_end();
-+		if (done != chunk_size) {
-+			return (CRYPTO_FAILED);
-+		}
-+		datap += done;
-+	}
-+	/* Decrypt remainder, which is less then chunk size, in one go. */
-+	kfpu_begin();
-+	if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) {
-+		done = aesni_gcm_decrypt(datap, datap, bleft,
-+		    (const void *)key, ctx->gcm_cb, ghash);
-+		if (done == 0) {
-+			clear_fpu_regs();
-+			kfpu_end();
-+			return (CRYPTO_FAILED);
-+		}
-+		datap += done;
-+		bleft -= done;
-+	}
-+	ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES);
-+
-+	/*
-+	 * Now less then GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
-+	 * decrypt them block by block.
-+	 */
-+	while (bleft > 0) {
-+		/* Incomplete last block. */
-+		if (bleft < block_size) {
-+			uint8_t *lastb = (uint8_t *)ctx->gcm_remainder;
-+
-+			bzero(lastb, block_size);
-+			bcopy(datap, lastb, bleft);
-+			/* The GCM processing. */
-+			GHASH_AVX(ctx, lastb, block_size);
-+			aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
-+			for (size_t i = 0; i < bleft; i++) {
-+				datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i];
-+			}
-+			break;
-+		}
-+		/* The GCM processing. */
-+		GHASH_AVX(ctx, datap, block_size);
-+		aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
-+		gcm_xor_avx((uint8_t *)tmp, datap);
-+		gcm_incr_counter_block(ctx);
-+
-+		datap += block_size;
-+		bleft -= block_size;
-+	}
-+	if (rv != CRYPTO_SUCCESS) {
-+		clear_fpu_regs();
-+		kfpu_end();
-+		return (rv);
-+	}
-+	/* Decryption done, finish the tag. */
-+	ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
-+	GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size);
-+	aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0,
-+	    (uint32_t *)ctx->gcm_J0);
-+
-+	gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash);
-+
-+	/* We are done with the FPU, restore its state. */
-+	clear_fpu_regs();
-+	kfpu_end();
-+
-+	/* Compare the input authentication tag with what we calculated. */
-+	if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
-+		/* They don't match. */
-+		return (CRYPTO_INVALID_MAC);
-+	}
-+	rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
-+	if (rv != CRYPTO_SUCCESS) {
-+		return (rv);
-+	}
-+	out->cd_offset += pt_len;
-+	gcm_clear_ctx(ctx);
-+	return (CRYPTO_SUCCESS);
-+}
-+
-+/*
-+ * Initialize the GCM params H, Htabtle and the counter block. Save the
-+ * initial counter block.
-+ */
-+static int
-+gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
-+    unsigned char *auth_data, size_t auth_data_len, size_t block_size)
-+{
-+	uint8_t *cb = (uint8_t *)ctx->gcm_cb;
-+	uint64_t *H = ctx->gcm_H;
-+	const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
-+	int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr;
-+	uint8_t *datap = auth_data;
-+	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
-+	size_t bleft;
-+
-+	ASSERT(block_size == GCM_BLOCK_LEN);
-+
-+	/* Init H (encrypt zero block) and create the initial counter block. */
-+	bzero(ctx->gcm_ghash, sizeof (ctx->gcm_ghash));
-+	bzero(H, sizeof (ctx->gcm_H));
-+	kfpu_begin();
-+	aes_encrypt_intel(keysched, aes_rounds,
-+	    (const uint32_t *)H, (uint32_t *)H);
-+
-+	gcm_init_htab_avx(ctx->gcm_Htable, H);
-+
-+	if (iv_len == 12) {
-+		bcopy(iv, cb, 12);
-+		cb[12] = 0;
-+		cb[13] = 0;
-+		cb[14] = 0;
-+		cb[15] = 1;
-+		/* We need the ICB later. */
-+		bcopy(cb, ctx->gcm_J0, sizeof (ctx->gcm_J0));
-+	} else {
-+		/*
-+		 * Most consumers use 12 byte IVs, so it's OK to use the
-+		 * original routines for other IV sizes, just avoid nesting
-+		 * kfpu_begin calls.
-+		 */
-+		clear_fpu_regs();
-+		kfpu_end();
-+		gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
-+		    aes_copy_block, aes_xor_block);
-+		kfpu_begin();
-+	}
-+
-+	/* Openssl post increments the counter, adjust for that. */
-+	gcm_incr_counter_block(ctx);
-+
-+	/* Ghash AAD in chunk_size blocks. */
-+	for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) {
-+		GHASH_AVX(ctx, datap, chunk_size);
-+		datap += chunk_size;
-+		clear_fpu_regs();
-+		kfpu_end();
-+		kfpu_begin();
-+	}
-+	/* Ghash the remainder and handle possible incomplete GCM block. */
-+	if (bleft > 0) {
-+		size_t incomp = bleft % block_size;
-+
-+		bleft -= incomp;
-+		if (bleft > 0) {
-+			GHASH_AVX(ctx, datap, bleft);
-+			datap += bleft;
-+		}
-+		if (incomp > 0) {
-+			/* Zero pad and hash incomplete last block. */
-+			uint8_t *authp = (uint8_t *)ctx->gcm_tmp;
-+
-+			bzero(authp, block_size);
-+			bcopy(datap, authp, incomp);
-+			GHASH_AVX(ctx, authp, block_size);
-+		}
-+	}
-+	clear_fpu_regs();
-+	kfpu_end();
-+	return (CRYPTO_SUCCESS);
-+}
-+
-+#if defined(_KERNEL)
-+static int
-+icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp)
-+{
-+	unsigned long val;
-+	char val_rounded[16];
-+	int error = 0;
-+
-+	error = kstrtoul(buf, 0, &val);
-+	if (error)
-+		return (error);
-+
-+	val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
-+
-+	if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE)
-+		return (-EINVAL);
-+
-+	snprintf(val_rounded, 16, "%u", (uint32_t)val);
-+	error = param_set_uint(val_rounded, kp);
-+	return (error);
-+}
-+
-+module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size,
-+    param_get_uint, &gcm_avx_chunk_size, 0644);
-+
-+MODULE_PARM_DESC(icp_gcm_avx_chunk_size,
-+	"How many bytes to process while owning the FPU");
-+
-+#endif /* defined(__KERNEL) */
-+#endif /* ifdef CAN_USE_GCM_ASM */
-Index: zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
-===================================================================
---- /dev/null
-+++ zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams
-@@ -0,0 +1,36 @@
-+Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
-+All rights reserved.
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions
-+are met:
-+
-+      *	Redistributions of source code must retain copyright notices,
-+	this list of conditions and the following disclaimer.
-+
-+      *	Redistributions in binary form must reproduce the above
-+	copyright notice, this list of conditions and the following
-+	disclaimer in the documentation and/or other materials
-+	provided with the distribution.
-+
-+      *	Neither the name of the CRYPTOGAMS nor the names of its
-+	copyright holder and contributors may be used to endorse or
-+	promote products derived from this software without specific
-+	prior written permission.
-+
-+ALTERNATIVELY, provided that this notice is retained in full, this
-+product may be distributed under the terms of the GNU General Public
-+License (GPL), in which case the provisions of the GPL apply INSTEAD OF
-+those given above.
-+
-+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
-+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-Index: zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip
-===================================================================
---- /dev/null
-+++ zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip
-@@ -0,0 +1 @@
-+PORTIONS OF GCM and GHASH FUNCTIONALITY
-Index: zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
-===================================================================
---- /dev/null
-+++ zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl
-@@ -0,0 +1,177 @@
-+
-+                                 Apache License
-+                           Version 2.0, January 2004
-+                        https://www.apache.org/licenses/
-+
-+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-+
-+   1. Definitions.
-+
-+      "License" shall mean the terms and conditions for use, reproduction,
-+      and distribution as defined by Sections 1 through 9 of this document.
-+
-+      "Licensor" shall mean the copyright owner or entity authorized by
-+      the copyright owner that is granting the License.
-+
-+      "Legal Entity" shall mean the union of the acting entity and all
-+      other entities that control, are controlled by, or are under common
-+      control with that entity. For the purposes of this definition,
-+      "control" means (i) the power, direct or indirect, to cause the
-+      direction or management of such entity, whether by contract or
-+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-+      outstanding shares, or (iii) beneficial ownership of such entity.
-+
-+      "You" (or "Your") shall mean an individual or Legal Entity
-+      exercising permissions granted by this License.
-+
-+      "Source" form shall mean the preferred form for making modifications,
-+      including but not limited to software source code, documentation
-+      source, and configuration files.
-+
-+      "Object" form shall mean any form resulting from mechanical
-+      transformation or translation of a Source form, including but
-+      not limited to compiled object code, generated documentation,
-+      and conversions to other media types.
-+
-+      "Work" shall mean the work of authorship, whether in Source or
-+      Object form, made available under the License, as indicated by a
-+      copyright notice that is included in or attached to the work
-+      (an example is provided in the Appendix below).
-+
-+      "Derivative Works" shall mean any work, whether in Source or Object
-+      form, that is based on (or derived from) the Work and for which the
-+      editorial revisions, annotations, elaborations, or other modifications
-+      represent, as a whole, an original work of authorship. For the purposes
-+      of this License, Derivative Works shall not include works that remain
-+      separable from, or merely link (or bind by name) to the interfaces of,
-+      the Work and Derivative Works thereof.
-+
-+      "Contribution" shall mean any work of authorship, including
-+      the original version of the Work and any modifications or additions
-+      to that Work or Derivative Works thereof, that is intentionally
-+      submitted to Licensor for inclusion in the Work by the copyright owner
-+      or by an individual or Legal Entity authorized to submit on behalf of
-+      the copyright owner. For the purposes of this definition, "submitted"
-+      means any form of electronic, verbal, or written communication sent
-+      to the Licensor or its representatives, including but not limited to
-+      communication on electronic mailing lists, source code control systems,
-+      and issue tracking systems that are managed by, or on behalf of, the
-+      Licensor for the purpose of discussing and improving the Work, but
-+      excluding communication that is conspicuously marked or otherwise
-+      designated in writing by the copyright owner as "Not a Contribution."
-+
-+      "Contributor" shall mean Licensor and any individual or Legal Entity
-+      on behalf of whom a Contribution has been received by Licensor and
-+      subsequently incorporated within the Work.
-+
-+   2. Grant of Copyright License. Subject to the terms and conditions of
-+      this License, each Contributor hereby grants to You a perpetual,
-+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-+      copyright license to reproduce, prepare Derivative Works of,
-+      publicly display, publicly perform, sublicense, and distribute the
-+      Work and such Derivative Works in Source or Object form.
-+
-+   3. Grant of Patent License. Subject to the terms and conditions of
-+      this License, each Contributor hereby grants to You a perpetual,
-+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-+      (except as stated in this section) patent license to make, have made,
-+      use, offer to sell, sell, import, and otherwise transfer the Work,
-+      where such license applies only to those patent claims licensable
-+      by such Contributor that are necessarily infringed by their
-+      Contribution(s) alone or by combination of their Contribution(s)
-+      with the Work to which such Contribution(s) was submitted. If You
-+      institute patent litigation against any entity (including a
-+      cross-claim or counterclaim in a lawsuit) alleging that the Work
-+      or a Contribution incorporated within the Work constitutes direct
-+      or contributory patent infringement, then any patent licenses
-+      granted to You under this License for that Work shall terminate
-+      as of the date such litigation is filed.
-+
-+   4. Redistribution. You may reproduce and distribute copies of the
-+      Work or Derivative Works thereof in any medium, with or without
-+      modifications, and in Source or Object form, provided that You
-+      meet the following conditions:
-+
-+      (a) You must give any other recipients of the Work or
-+          Derivative Works a copy of this License; and
-+
-+      (b) You must cause any modified files to carry prominent notices
-+          stating that You changed the files; and
-+
-+      (c) You must retain, in the Source form of any Derivative Works
-+          that You distribute, all copyright, patent, trademark, and
-+          attribution notices from the Source form of the Work,
-+          excluding those notices that do not pertain to any part of
-+          the Derivative Works; and
-+
-+      (d) If the Work includes a "NOTICE" text file as part of its
-+          distribution, then any Derivative Works that You distribute must
-+          include a readable copy of the attribution notices contained
-+          within such NOTICE file, excluding those notices that do not
-+          pertain to any part of the Derivative Works, in at least one
-+          of the following places: within a NOTICE text file distributed
-+          as part of the Derivative Works; within the Source form or
-+          documentation, if provided along with the Derivative Works; or,
-+          within a display generated by the Derivative Works, if and
-+          wherever such third-party notices normally appear. The contents
-+          of the NOTICE file are for informational purposes only and
-+          do not modify the License. You may add Your own attribution
-+          notices within Derivative Works that You distribute, alongside
-+          or as an addendum to the NOTICE text from the Work, provided
-+          that such additional attribution notices cannot be construed
-+          as modifying the License.
-+
-+      You may add Your own copyright statement to Your modifications and
-+      may provide additional or different license terms and conditions
-+      for use, reproduction, or distribution of Your modifications, or
-+      for any such Derivative Works as a whole, provided Your use,
-+      reproduction, and distribution of the Work otherwise complies with
-+      the conditions stated in this License.
-+
-+   5. Submission of Contributions. Unless You explicitly state otherwise,
-+      any Contribution intentionally submitted for inclusion in the Work
-+      by You to the Licensor shall be under the terms and conditions of
-+      this License, without any additional terms or conditions.
-+      Notwithstanding the above, nothing herein shall supersede or modify
-+      the terms of any separate license agreement you may have executed
-+      with Licensor regarding such Contributions.
-+
-+   6. Trademarks. This License does not grant permission to use the trade
-+      names, trademarks, service marks, or product names of the Licensor,
-+      except as required for reasonable and customary use in describing the
-+      origin of the Work and reproducing the content of the NOTICE file.
-+
-+   7. Disclaimer of Warranty. Unless required by applicable law or
-+      agreed to in writing, Licensor provides the Work (and each
-+      Contributor provides its Contributions) on an "AS IS" BASIS,
-+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-+      implied, including, without limitation, any warranties or conditions
-+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-+      PARTICULAR PURPOSE. You are solely responsible for determining the
-+      appropriateness of using or redistributing the Work and assume any
-+      risks associated with Your exercise of permissions under this License.
-+
-+   8. Limitation of Liability. In no event and under no legal theory,
-+      whether in tort (including negligence), contract, or otherwise,
-+      unless required by applicable law (such as deliberate and grossly
-+      negligent acts) or agreed to in writing, shall any Contributor be
-+      liable to You for damages, including any direct, indirect, special,
-+      incidental, or consequential damages of any character arising as a
-+      result of this License or out of the use or inability to use the
-+      Work (including but not limited to damages for loss of goodwill,
-+      work stoppage, computer failure or malfunction, or any and all
-+      other commercial damages or losses), even if such Contributor
-+      has been advised of the possibility of such damages.
-+
-+   9. Accepting Warranty or Additional Liability. While redistributing
-+      the Work or Derivative Works thereof, You may choose to offer,
-+      and charge a fee for, acceptance of support, warranty, indemnity,
-+      or other liability obligations and/or rights consistent with this
-+      License. However, in accepting such obligations, You may act only
-+      on Your own behalf and on Your sole responsibility, not on behalf
-+      of any other Contributor, and only if You agree to indemnify,
-+      defend, and hold each Contributor harmless for any liability
-+      incurred by, or claims asserted against, such Contributor by reason
-+      of your accepting any such warranty or additional liability.
-+
-+   END OF TERMS AND CONDITIONS
-Index: zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip
-===================================================================
---- /dev/null
-+++ zfs-linux-0.8.3/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip
-@@ -0,0 +1 @@
-+PORTIONS OF GCM and GHASH FUNCTIONALITY
-Index: zfs-linux-0.8.3/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
-===================================================================
---- /dev/null
-+++ zfs-linux-0.8.3/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
-@@ -0,0 +1,892 @@
-+# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
-+#
-+# Licensed under the Apache License 2.0 (the "License").  You may not use
-+# this file except in compliance with the License.  You can obtain a copy
-+# in the file LICENSE in the source distribution or at
-+# https://www.openssl.org/source/license.html
-+
-+#
-+# ====================================================================
-+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-+# project. The module is, however, dual licensed under OpenSSL and
-+# CRYPTOGAMS licenses depending on where you obtain it. For further
-+# details see http://www.openssl.org/~appro/cryptogams/.
-+# ====================================================================
-+#
-+#
-+# AES-NI-CTR+GHASH stitch.
-+#
-+# February 2013
-+#
-+# OpenSSL GCM implementation is organized in such way that its
-+# performance is rather close to the sum of its streamed components,
-+# in the context parallelized AES-NI CTR and modulo-scheduled
-+# PCLMULQDQ-enabled GHASH. Unfortunately, as no stitch implementation
-+# was observed to perform significantly better than the sum of the
-+# components on contemporary CPUs, the effort was deemed impossible to
-+# justify. This module is based on combination of Intel submissions,
-+# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
-+# Locktyukhin of Intel Corp. who verified that it reduces shuffles
-+# pressure with notable relative improvement, achieving 1.0 cycle per
-+# byte processed with 128-bit key on Haswell processor, 0.74 - on
-+# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled
-+# measurements for favourable packet size, one divisible by 96.
-+# Applications using the EVP interface will observe a few percent
-+# worse performance.]
-+#
-+# Knights Landing processes 1 byte in 1.25 cycles (measured with EVP).
-+#
-+# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
-+# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
-+
-+# Generated once from
-+# https://github.com/openssl/openssl/blob/5ffc3324/crypto/modes/asm/aesni-gcm-x86_64.pl
-+# and modified for ICP. Modification are kept at a bare minimum to ease later
-+# upstream merges.
-+
-+#if defined(__x86_64__) && defined(HAVE_AVX) && \
-+    defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) && defined(HAVE_MOVBE)
-+
-+.text
-+
-+.type	_aesni_ctr32_ghash_6x,@function
-+.align	32
-+_aesni_ctr32_ghash_6x:
-+	vmovdqu	32(%r11),%xmm2
-+	subq	$6,%rdx
-+	vpxor	%xmm4,%xmm4,%xmm4
-+	vmovdqu	0-128(%rcx),%xmm15
-+	vpaddb	%xmm2,%xmm1,%xmm10
-+	vpaddb	%xmm2,%xmm10,%xmm11
-+	vpaddb	%xmm2,%xmm11,%xmm12
-+	vpaddb	%xmm2,%xmm12,%xmm13
-+	vpaddb	%xmm2,%xmm13,%xmm14
-+	vpxor	%xmm15,%xmm1,%xmm9
-+	vmovdqu	%xmm4,16+8(%rsp)
-+	jmp	.Loop6x
-+
-+.align	32
-+.Loop6x:
-+	addl	$100663296,%ebx
-+	jc	.Lhandle_ctr32
-+	vmovdqu	0-32(%r9),%xmm3
-+	vpaddb	%xmm2,%xmm14,%xmm1
-+	vpxor	%xmm15,%xmm10,%xmm10
-+	vpxor	%xmm15,%xmm11,%xmm11
-+
-+.Lresume_ctr32:
-+	vmovdqu	%xmm1,(%r8)
-+	vpclmulqdq	$0x10,%xmm3,%xmm7,%xmm5
-+	vpxor	%xmm15,%xmm12,%xmm12
-+	vmovups	16-128(%rcx),%xmm2
-+	vpclmulqdq	$0x01,%xmm3,%xmm7,%xmm6
-+	xorq	%r12,%r12
-+	cmpq	%r14,%r15
-+
-+	vaesenc	%xmm2,%xmm9,%xmm9
-+	vmovdqu	48+8(%rsp),%xmm0
-+	vpxor	%xmm15,%xmm13,%xmm13
-+	vpclmulqdq	$0x00,%xmm3,%xmm7,%xmm1
-+	vaesenc	%xmm2,%xmm10,%xmm10
-+	vpxor	%xmm15,%xmm14,%xmm14
-+	setnc	%r12b
-+	vpclmulqdq	$0x11,%xmm3,%xmm7,%xmm7
-+	vaesenc	%xmm2,%xmm11,%xmm11
-+	vmovdqu	16-32(%r9),%xmm3
-+	negq	%r12
-+	vaesenc	%xmm2,%xmm12,%xmm12
-+	vpxor	%xmm5,%xmm6,%xmm6
-+	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm5
-+	vpxor	%xmm4,%xmm8,%xmm8
-+	vaesenc	%xmm2,%xmm13,%xmm13
-+	vpxor	%xmm5,%xmm1,%xmm4
-+	andq	$0x60,%r12
-+	vmovups	32-128(%rcx),%xmm15
-+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm1
-+	vaesenc	%xmm2,%xmm14,%xmm14
-+
-+	vpclmulqdq	$0x01,%xmm3,%xmm0,%xmm2
-+	leaq	(%r14,%r12,1),%r14
-+	vaesenc	%xmm15,%xmm9,%xmm9
-+	vpxor	16+8(%rsp),%xmm8,%xmm8
-+	vpclmulqdq	$0x11,%xmm3,%xmm0,%xmm3
-+	vmovdqu	64+8(%rsp),%xmm0
-+	vaesenc	%xmm15,%xmm10,%xmm10
-+	movbeq	88(%r14),%r13
-+	vaesenc	%xmm15,%xmm11,%xmm11
-+	movbeq	80(%r14),%r12
-+	vaesenc	%xmm15,%xmm12,%xmm12
-+	movq	%r13,32+8(%rsp)
-+	vaesenc	%xmm15,%xmm13,%xmm13
-+	movq	%r12,40+8(%rsp)
-+	vmovdqu	48-32(%r9),%xmm5
-+	vaesenc	%xmm15,%xmm14,%xmm14
-+
-+	vmovups	48-128(%rcx),%xmm15
-+	vpxor	%xmm1,%xmm6,%xmm6
-+	vpclmulqdq	$0x00,%xmm5,%xmm0,%xmm1
-+	vaesenc	%xmm15,%xmm9,%xmm9
-+	vpxor	%xmm2,%xmm6,%xmm6
-+	vpclmulqdq	$0x10,%xmm5,%xmm0,%xmm2
-+	vaesenc	%xmm15,%xmm10,%xmm10
-+	vpxor	%xmm3,%xmm7,%xmm7
-+	vpclmulqdq	$0x01,%xmm5,%xmm0,%xmm3
-+	vaesenc	%xmm15,%xmm11,%xmm11
-+	vpclmulqdq	$0x11,%xmm5,%xmm0,%xmm5
-+	vmovdqu	80+8(%rsp),%xmm0
-+	vaesenc	%xmm15,%xmm12,%xmm12
-+	vaesenc	%xmm15,%xmm13,%xmm13
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vmovdqu	64-32(%r9),%xmm1
-+	vaesenc	%xmm15,%xmm14,%xmm14
-+
-+	vmovups	64-128(%rcx),%xmm15
-+	vpxor	%xmm2,%xmm6,%xmm6
-+	vpclmulqdq	$0x00,%xmm1,%xmm0,%xmm2
-+	vaesenc	%xmm15,%xmm9,%xmm9
-+	vpxor	%xmm3,%xmm6,%xmm6
-+	vpclmulqdq	$0x10,%xmm1,%xmm0,%xmm3
-+	vaesenc	%xmm15,%xmm10,%xmm10
-+	movbeq	72(%r14),%r13
-+	vpxor	%xmm5,%xmm7,%xmm7
-+	vpclmulqdq	$0x01,%xmm1,%xmm0,%xmm5
-+	vaesenc	%xmm15,%xmm11,%xmm11
-+	movbeq	64(%r14),%r12
-+	vpclmulqdq	$0x11,%xmm1,%xmm0,%xmm1
-+	vmovdqu	96+8(%rsp),%xmm0
-+	vaesenc	%xmm15,%xmm12,%xmm12
-+	movq	%r13,48+8(%rsp)
-+	vaesenc	%xmm15,%xmm13,%xmm13
-+	movq	%r12,56+8(%rsp)
-+	vpxor	%xmm2,%xmm4,%xmm4
-+	vmovdqu	96-32(%r9),%xmm2
-+	vaesenc	%xmm15,%xmm14,%xmm14
-+
-+	vmovups	80-128(%rcx),%xmm15
-+	vpxor	%xmm3,%xmm6,%xmm6
-+	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm3
-+	vaesenc	%xmm15,%xmm9,%xmm9
-+	vpxor	%xmm5,%xmm6,%xmm6
-+	vpclmulqdq	$0x10,%xmm2,%xmm0,%xmm5
-+	vaesenc	%xmm15,%xmm10,%xmm10
-+	movbeq	56(%r14),%r13
-+	vpxor	%xmm1,%xmm7,%xmm7
-+	vpclmulqdq	$0x01,%xmm2,%xmm0,%xmm1
-+	vpxor	112+8(%rsp),%xmm8,%xmm8
-+	vaesenc	%xmm15,%xmm11,%xmm11
-+	movbeq	48(%r14),%r12
-+	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm2
-+	vaesenc	%xmm15,%xmm12,%xmm12
-+	movq	%r13,64+8(%rsp)
-+	vaesenc	%xmm15,%xmm13,%xmm13
-+	movq	%r12,72+8(%rsp)
-+	vpxor	%xmm3,%xmm4,%xmm4
-+	vmovdqu	112-32(%r9),%xmm3
-+	vaesenc	%xmm15,%xmm14,%xmm14
-+
-+	vmovups	96-128(%rcx),%xmm15
-+	vpxor	%xmm5,%xmm6,%xmm6
-+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm5
-+	vaesenc	%xmm15,%xmm9,%xmm9
-+	vpxor	%xmm1,%xmm6,%xmm6
-+	vpclmulqdq	$0x01,%xmm3,%xmm8,%xmm1
-+	vaesenc	%xmm15,%xmm10,%xmm10
-+	movbeq	40(%r14),%r13
-+	vpxor	%xmm2,%xmm7,%xmm7
-+	vpclmulqdq	$0x00,%xmm3,%xmm8,%xmm2
-+	vaesenc	%xmm15,%xmm11,%xmm11
-+	movbeq	32(%r14),%r12
-+	vpclmulqdq	$0x11,%xmm3,%xmm8,%xmm8
-+	vaesenc	%xmm15,%xmm12,%xmm12
-+	movq	%r13,80+8(%rsp)
-+	vaesenc	%xmm15,%xmm13,%xmm13
-+	movq	%r12,88+8(%rsp)
-+	vpxor	%xmm5,%xmm6,%xmm6
-+	vaesenc	%xmm15,%xmm14,%xmm14
-+	vpxor	%xmm1,%xmm6,%xmm6
-+
-+	vmovups	112-128(%rcx),%xmm15
-+	vpslldq	$8,%xmm6,%xmm5
-+	vpxor	%xmm2,%xmm4,%xmm4
-+	vmovdqu	16(%r11),%xmm3
-+
-+	vaesenc	%xmm15,%xmm9,%xmm9
-+	vpxor	%xmm8,%xmm7,%xmm7
-+	vaesenc	%xmm15,%xmm10,%xmm10
-+	vpxor	%xmm5,%xmm4,%xmm4
-+	movbeq	24(%r14),%r13
-+	vaesenc	%xmm15,%xmm11,%xmm11
-+	movbeq	16(%r14),%r12
-+	vpalignr	$8,%xmm4,%xmm4,%xmm0
-+	vpclmulqdq	$0x10,%xmm3,%xmm4,%xmm4
-+	movq	%r13,96+8(%rsp)
-+	vaesenc	%xmm15,%xmm12,%xmm12
-+	movq	%r12,104+8(%rsp)
-+	vaesenc	%xmm15,%xmm13,%xmm13
-+	vmovups	128-128(%rcx),%xmm1
-+	vaesenc	%xmm15,%xmm14,%xmm14
-+
-+	vaesenc	%xmm1,%xmm9,%xmm9
-+	vmovups	144-128(%rcx),%xmm15
-+	vaesenc	%xmm1,%xmm10,%xmm10
-+	vpsrldq	$8,%xmm6,%xmm6
-+	vaesenc	%xmm1,%xmm11,%xmm11
-+	vpxor	%xmm6,%xmm7,%xmm7
-+	vaesenc	%xmm1,%xmm12,%xmm12
-+	vpxor	%xmm0,%xmm4,%xmm4
-+	movbeq	8(%r14),%r13
-+	vaesenc	%xmm1,%xmm13,%xmm13
-+	movbeq	0(%r14),%r12
-+	vaesenc	%xmm1,%xmm14,%xmm14
-+	vmovups	160-128(%rcx),%xmm1
-+	cmpl	$12,%ebp	// ICP uses 10,12,14 not 9,11,13 for rounds.
-+	jb	.Lenc_tail
-+
-+	vaesenc	%xmm15,%xmm9,%xmm9
-+	vaesenc	%xmm15,%xmm10,%xmm10
-+	vaesenc	%xmm15,%xmm11,%xmm11
-+	vaesenc	%xmm15,%xmm12,%xmm12
-+	vaesenc	%xmm15,%xmm13,%xmm13
-+	vaesenc	%xmm15,%xmm14,%xmm14
-+
-+	vaesenc	%xmm1,%xmm9,%xmm9
-+	vaesenc	%xmm1,%xmm10,%xmm10
-+	vaesenc	%xmm1,%xmm11,%xmm11
-+	vaesenc	%xmm1,%xmm12,%xmm12
-+	vaesenc	%xmm1,%xmm13,%xmm13
-+	vmovups	176-128(%rcx),%xmm15
-+	vaesenc	%xmm1,%xmm14,%xmm14
-+	vmovups	192-128(%rcx),%xmm1
-+	cmpl	$14,%ebp	// ICP does not zero key schedule.
-+	jb	.Lenc_tail
-+
-+	vaesenc	%xmm15,%xmm9,%xmm9
-+	vaesenc	%xmm15,%xmm10,%xmm10
-+	vaesenc	%xmm15,%xmm11,%xmm11
-+	vaesenc	%xmm15,%xmm12,%xmm12
-+	vaesenc	%xmm15,%xmm13,%xmm13
-+	vaesenc	%xmm15,%xmm14,%xmm14
-+
-+	vaesenc	%xmm1,%xmm9,%xmm9
-+	vaesenc	%xmm1,%xmm10,%xmm10
-+	vaesenc	%xmm1,%xmm11,%xmm11
-+	vaesenc	%xmm1,%xmm12,%xmm12
-+	vaesenc	%xmm1,%xmm13,%xmm13
-+	vmovups	208-128(%rcx),%xmm15
-+	vaesenc	%xmm1,%xmm14,%xmm14
-+	vmovups	224-128(%rcx),%xmm1
-+	jmp	.Lenc_tail
-+
-+.align	32
-+.Lhandle_ctr32:
-+	vmovdqu	(%r11),%xmm0
-+	vpshufb	%xmm0,%xmm1,%xmm6
-+	vmovdqu	48(%r11),%xmm5
-+	vpaddd	64(%r11),%xmm6,%xmm10
-+	vpaddd	%xmm5,%xmm6,%xmm11
-+	vmovdqu	0-32(%r9),%xmm3
-+	vpaddd	%xmm5,%xmm10,%xmm12
-+	vpshufb	%xmm0,%xmm10,%xmm10
-+	vpaddd	%xmm5,%xmm11,%xmm13
-+	vpshufb	%xmm0,%xmm11,%xmm11
-+	vpxor	%xmm15,%xmm10,%xmm10
-+	vpaddd	%xmm5,%xmm12,%xmm14
-+	vpshufb	%xmm0,%xmm12,%xmm12
-+	vpxor	%xmm15,%xmm11,%xmm11
-+	vpaddd	%xmm5,%xmm13,%xmm1
-+	vpshufb	%xmm0,%xmm13,%xmm13
-+	vpshufb	%xmm0,%xmm14,%xmm14
-+	vpshufb	%xmm0,%xmm1,%xmm1
-+	jmp	.Lresume_ctr32
-+
-+.align	32
-+.Lenc_tail:
-+	vaesenc	%xmm15,%xmm9,%xmm9
-+	vmovdqu	%xmm7,16+8(%rsp)
-+	vpalignr	$8,%xmm4,%xmm4,%xmm8
-+	vaesenc	%xmm15,%xmm10,%xmm10
-+	vpclmulqdq	$0x10,%xmm3,%xmm4,%xmm4
-+	vpxor	0(%rdi),%xmm1,%xmm2
-+	vaesenc	%xmm15,%xmm11,%xmm11
-+	vpxor	16(%rdi),%xmm1,%xmm0
-+	vaesenc	%xmm15,%xmm12,%xmm12
-+	vpxor	32(%rdi),%xmm1,%xmm5
-+	vaesenc	%xmm15,%xmm13,%xmm13
-+	vpxor	48(%rdi),%xmm1,%xmm6
-+	vaesenc	%xmm15,%xmm14,%xmm14
-+	vpxor	64(%rdi),%xmm1,%xmm7
-+	vpxor	80(%rdi),%xmm1,%xmm3
-+	vmovdqu	(%r8),%xmm1
-+
-+	vaesenclast	%xmm2,%xmm9,%xmm9
-+	vmovdqu	32(%r11),%xmm2
-+	vaesenclast	%xmm0,%xmm10,%xmm10
-+	vpaddb	%xmm2,%xmm1,%xmm0
-+	movq	%r13,112+8(%rsp)
-+	leaq	96(%rdi),%rdi
-+	vaesenclast	%xmm5,%xmm11,%xmm11
-+	vpaddb	%xmm2,%xmm0,%xmm5
-+	movq	%r12,120+8(%rsp)
-+	leaq	96(%rsi),%rsi
-+	vmovdqu	0-128(%rcx),%xmm15
-+	vaesenclast	%xmm6,%xmm12,%xmm12
-+	vpaddb	%xmm2,%xmm5,%xmm6
-+	vaesenclast	%xmm7,%xmm13,%xmm13
-+	vpaddb	%xmm2,%xmm6,%xmm7
-+	vaesenclast	%xmm3,%xmm14,%xmm14
-+	vpaddb	%xmm2,%xmm7,%xmm3
-+
-+	addq	$0x60,%r10
-+	subq	$0x6,%rdx
-+	jc	.L6x_done
-+
-+	vmovups	%xmm9,-96(%rsi)
-+	vpxor	%xmm15,%xmm1,%xmm9
-+	vmovups	%xmm10,-80(%rsi)
-+	vmovdqa	%xmm0,%xmm10
-+	vmovups	%xmm11,-64(%rsi)
-+	vmovdqa	%xmm5,%xmm11
-+	vmovups	%xmm12,-48(%rsi)
-+	vmovdqa	%xmm6,%xmm12
-+	vmovups	%xmm13,-32(%rsi)
-+	vmovdqa	%xmm7,%xmm13
-+	vmovups	%xmm14,-16(%rsi)
-+	vmovdqa	%xmm3,%xmm14
-+	vmovdqu	32+8(%rsp),%xmm7
-+	jmp	.Loop6x
-+
-+.L6x_done:
-+	vpxor	16+8(%rsp),%xmm8,%xmm8
-+	vpxor	%xmm4,%xmm8,%xmm8
-+
-+	.byte	0xf3,0xc3
-+.size	_aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
-+.globl	aesni_gcm_decrypt
-+.type	aesni_gcm_decrypt,@function
-+.align	32
-+aesni_gcm_decrypt:
-+.cfi_startproc
-+	xorq	%r10,%r10
-+	cmpq	$0x60,%rdx
-+	jb	.Lgcm_dec_abort
-+
-+	leaq	(%rsp),%rax
-+.cfi_def_cfa_register	%rax
-+	pushq	%rbx
-+.cfi_offset	%rbx,-16
-+	pushq	%rbp
-+.cfi_offset	%rbp,-24
-+	pushq	%r12
-+.cfi_offset	%r12,-32
-+	pushq	%r13
-+.cfi_offset	%r13,-40
-+	pushq	%r14
-+.cfi_offset	%r14,-48
-+	pushq	%r15
-+.cfi_offset	%r15,-56
-+	vzeroupper
-+
-+	vmovdqu	(%r8),%xmm1
-+	addq	$-128,%rsp
-+	movl	12(%r8),%ebx
-+	leaq	.Lbswap_mask(%rip),%r11
-+	leaq	-128(%rcx),%r14
-+	movq	$0xf80,%r15
-+	vmovdqu	(%r9),%xmm8
-+	andq	$-128,%rsp
-+	vmovdqu	(%r11),%xmm0
-+	leaq	128(%rcx),%rcx
-+	leaq	32+32(%r9),%r9
-+	movl	504-128(%rcx),%ebp	// ICP has a larger offset for rounds.
-+	vpshufb	%xmm0,%xmm8,%xmm8
-+
-+	andq	%r15,%r14
-+	andq	%rsp,%r15
-+	subq	%r14,%r15
-+	jc	.Ldec_no_key_aliasing
-+	cmpq	$768,%r15
-+	jnc	.Ldec_no_key_aliasing
-+	subq	%r15,%rsp
-+.Ldec_no_key_aliasing:
-+
-+	vmovdqu	80(%rdi),%xmm7
-+	leaq	(%rdi),%r14
-+	vmovdqu	64(%rdi),%xmm4
-+	leaq	-192(%rdi,%rdx,1),%r15
-+	vmovdqu	48(%rdi),%xmm5
-+	shrq	$4,%rdx
-+	xorq	%r10,%r10
-+	vmovdqu	32(%rdi),%xmm6
-+	vpshufb	%xmm0,%xmm7,%xmm7
-+	vmovdqu	16(%rdi),%xmm2
-+	vpshufb	%xmm0,%xmm4,%xmm4
-+	vmovdqu	(%rdi),%xmm3
-+	vpshufb	%xmm0,%xmm5,%xmm5
-+	vmovdqu	%xmm4,48(%rsp)
-+	vpshufb	%xmm0,%xmm6,%xmm6
-+	vmovdqu	%xmm5,64(%rsp)
-+	vpshufb	%xmm0,%xmm2,%xmm2
-+	vmovdqu	%xmm6,80(%rsp)
-+	vpshufb	%xmm0,%xmm3,%xmm3
-+	vmovdqu	%xmm2,96(%rsp)
-+	vmovdqu	%xmm3,112(%rsp)
-+
-+	call	_aesni_ctr32_ghash_6x
-+
-+	vmovups	%xmm9,-96(%rsi)
-+	vmovups	%xmm10,-80(%rsi)
-+	vmovups	%xmm11,-64(%rsi)
-+	vmovups	%xmm12,-48(%rsi)
-+	vmovups	%xmm13,-32(%rsi)
-+	vmovups	%xmm14,-16(%rsi)
-+
-+	vpshufb	(%r11),%xmm8,%xmm8
-+	vmovdqu	%xmm8,-64(%r9)
-+
-+	vzeroupper
-+	movq	-48(%rax),%r15
-+.cfi_restore	%r15
-+	movq	-40(%rax),%r14
-+.cfi_restore	%r14
-+	movq	-32(%rax),%r13
-+.cfi_restore	%r13
-+	movq	-24(%rax),%r12
-+.cfi_restore	%r12
-+	movq	-16(%rax),%rbp
-+.cfi_restore	%rbp
-+	movq	-8(%rax),%rbx
-+.cfi_restore	%rbx
-+	leaq	(%rax),%rsp
-+.cfi_def_cfa_register	%rsp
-+.Lgcm_dec_abort:
-+	movq	%r10,%rax
-+	.byte	0xf3,0xc3
-+.cfi_endproc
-+.size	aesni_gcm_decrypt,.-aesni_gcm_decrypt
-+.type	_aesni_ctr32_6x,@function
-+.align	32
-+_aesni_ctr32_6x:
-+	vmovdqu	0-128(%rcx),%xmm4
-+	vmovdqu	32(%r11),%xmm2
-+	leaq	-2(%rbp),%r13	// ICP uses 10,12,14 not 9,11,13 for rounds.
-+	vmovups	16-128(%rcx),%xmm15
-+	leaq	32-128(%rcx),%r12
-+	vpxor	%xmm4,%xmm1,%xmm9
-+	addl	$100663296,%ebx
-+	jc	.Lhandle_ctr32_2
-+	vpaddb	%xmm2,%xmm1,%xmm10
-+	vpaddb	%xmm2,%xmm10,%xmm11
-+	vpxor	%xmm4,%xmm10,%xmm10
-+	vpaddb	%xmm2,%xmm11,%xmm12
-+	vpxor	%xmm4,%xmm11,%xmm11
-+	vpaddb	%xmm2,%xmm12,%xmm13
-+	vpxor	%xmm4,%xmm12,%xmm12
-+	vpaddb	%xmm2,%xmm13,%xmm14
-+	vpxor	%xmm4,%xmm13,%xmm13
-+	vpaddb	%xmm2,%xmm14,%xmm1
-+	vpxor	%xmm4,%xmm14,%xmm14
-+	jmp	.Loop_ctr32
-+
-+.align	16
-+.Loop_ctr32:
-+	vaesenc	%xmm15,%xmm9,%xmm9
-+	vaesenc	%xmm15,%xmm10,%xmm10
-+	vaesenc	%xmm15,%xmm11,%xmm11
-+	vaesenc	%xmm15,%xmm12,%xmm12
-+	vaesenc	%xmm15,%xmm13,%xmm13
-+	vaesenc	%xmm15,%xmm14,%xmm14
-+	vmovups	(%r12),%xmm15
-+	leaq	16(%r12),%r12
-+	decl	%r13d
-+	jnz	.Loop_ctr32
-+
-+	vmovdqu	(%r12),%xmm3
-+	vaesenc	%xmm15,%xmm9,%xmm9
-+	vpxor	0(%rdi),%xmm3,%xmm4
-+	vaesenc	%xmm15,%xmm10,%xmm10
-+	vpxor	16(%rdi),%xmm3,%xmm5
-+	vaesenc	%xmm15,%xmm11,%xmm11
-+	vpxor	32(%rdi),%xmm3,%xmm6
-+	vaesenc	%xmm15,%xmm12,%xmm12
-+	vpxor	48(%rdi),%xmm3,%xmm8
-+	vaesenc	%xmm15,%xmm13,%xmm13
-+	vpxor	64(%rdi),%xmm3,%xmm2
-+	vaesenc	%xmm15,%xmm14,%xmm14
-+	vpxor	80(%rdi),%xmm3,%xmm3
-+	leaq	96(%rdi),%rdi
-+
-+	vaesenclast	%xmm4,%xmm9,%xmm9
-+	vaesenclast	%xmm5,%xmm10,%xmm10
-+	vaesenclast	%xmm6,%xmm11,%xmm11
-+	vaesenclast	%xmm8,%xmm12,%xmm12
-+	vaesenclast	%xmm2,%xmm13,%xmm13
-+	vaesenclast	%xmm3,%xmm14,%xmm14
-+	vmovups	%xmm9,0(%rsi)
-+	vmovups	%xmm10,16(%rsi)
-+	vmovups	%xmm11,32(%rsi)
-+	vmovups	%xmm12,48(%rsi)
-+	vmovups	%xmm13,64(%rsi)
-+	vmovups	%xmm14,80(%rsi)
-+	leaq	96(%rsi),%rsi
-+
-+	.byte	0xf3,0xc3
-+.align	32
-+.Lhandle_ctr32_2:
-+	vpshufb	%xmm0,%xmm1,%xmm6
-+	vmovdqu	48(%r11),%xmm5
-+	vpaddd	64(%r11),%xmm6,%xmm10
-+	vpaddd	%xmm5,%xmm6,%xmm11
-+	vpaddd	%xmm5,%xmm10,%xmm12
-+	vpshufb	%xmm0,%xmm10,%xmm10
-+	vpaddd	%xmm5,%xmm11,%xmm13
-+	vpshufb	%xmm0,%xmm11,%xmm11
-+	vpxor	%xmm4,%xmm10,%xmm10
-+	vpaddd	%xmm5,%xmm12,%xmm14
-+	vpshufb	%xmm0,%xmm12,%xmm12
-+	vpxor	%xmm4,%xmm11,%xmm11
-+	vpaddd	%xmm5,%xmm13,%xmm1
-+	vpshufb	%xmm0,%xmm13,%xmm13
-+	vpxor	%xmm4,%xmm12,%xmm12
-+	vpshufb	%xmm0,%xmm14,%xmm14
-+	vpxor	%xmm4,%xmm13,%xmm13
-+	vpshufb	%xmm0,%xmm1,%xmm1
-+	vpxor	%xmm4,%xmm14,%xmm14
-+	jmp	.Loop_ctr32
-+.size	_aesni_ctr32_6x,.-_aesni_ctr32_6x
-+
-+.globl	aesni_gcm_encrypt
-+.type	aesni_gcm_encrypt,@function
-+.align	32
-+aesni_gcm_encrypt:
-+.cfi_startproc
-+	xorq	%r10,%r10
-+	cmpq	$288,%rdx
-+	jb	.Lgcm_enc_abort
-+
-+	leaq	(%rsp),%rax
-+.cfi_def_cfa_register	%rax
-+	pushq	%rbx
-+.cfi_offset	%rbx,-16
-+	pushq	%rbp
-+.cfi_offset	%rbp,-24
-+	pushq	%r12
-+.cfi_offset	%r12,-32
-+	pushq	%r13
-+.cfi_offset	%r13,-40
-+	pushq	%r14
-+.cfi_offset	%r14,-48
-+	pushq	%r15
-+.cfi_offset	%r15,-56
-+	vzeroupper
-+
-+	vmovdqu	(%r8),%xmm1
-+	addq	$-128,%rsp
-+	movl	12(%r8),%ebx
-+	leaq	.Lbswap_mask(%rip),%r11
-+	leaq	-128(%rcx),%r14
-+	movq	$0xf80,%r15
-+	leaq	128(%rcx),%rcx
-+	vmovdqu	(%r11),%xmm0
-+	andq	$-128,%rsp
-+	movl	504-128(%rcx),%ebp	// ICP has an larger offset for rounds.
-+
-+	andq	%r15,%r14
-+	andq	%rsp,%r15
-+	subq	%r14,%r15
-+	jc	.Lenc_no_key_aliasing
-+	cmpq	$768,%r15
-+	jnc	.Lenc_no_key_aliasing
-+	subq	%r15,%rsp
-+.Lenc_no_key_aliasing:
-+
-+	leaq	(%rsi),%r14
-+	leaq	-192(%rsi,%rdx,1),%r15
-+	shrq	$4,%rdx
-+
-+	call	_aesni_ctr32_6x
-+	vpshufb	%xmm0,%xmm9,%xmm8
-+	vpshufb	%xmm0,%xmm10,%xmm2
-+	vmovdqu	%xmm8,112(%rsp)
-+	vpshufb	%xmm0,%xmm11,%xmm4
-+	vmovdqu	%xmm2,96(%rsp)
-+	vpshufb	%xmm0,%xmm12,%xmm5
-+	vmovdqu	%xmm4,80(%rsp)
-+	vpshufb	%xmm0,%xmm13,%xmm6
-+	vmovdqu	%xmm5,64(%rsp)
-+	vpshufb	%xmm0,%xmm14,%xmm7
-+	vmovdqu	%xmm6,48(%rsp)
-+
-+	call	_aesni_ctr32_6x
-+
-+	vmovdqu	(%r9),%xmm8
-+	leaq	32+32(%r9),%r9
-+	subq	$12,%rdx
-+	movq	$192,%r10
-+	vpshufb	%xmm0,%xmm8,%xmm8
-+
-+	call	_aesni_ctr32_ghash_6x
-+	vmovdqu	32(%rsp),%xmm7
-+	vmovdqu	(%r11),%xmm0
-+	vmovdqu	0-32(%r9),%xmm3
-+	vpunpckhqdq	%xmm7,%xmm7,%xmm1
-+	vmovdqu	32-32(%r9),%xmm15
-+	vmovups	%xmm9,-96(%rsi)
-+	vpshufb	%xmm0,%xmm9,%xmm9
-+	vpxor	%xmm7,%xmm1,%xmm1
-+	vmovups	%xmm10,-80(%rsi)
-+	vpshufb	%xmm0,%xmm10,%xmm10
-+	vmovups	%xmm11,-64(%rsi)
-+	vpshufb	%xmm0,%xmm11,%xmm11
-+	vmovups	%xmm12,-48(%rsi)
-+	vpshufb	%xmm0,%xmm12,%xmm12
-+	vmovups	%xmm13,-32(%rsi)
-+	vpshufb	%xmm0,%xmm13,%xmm13
-+	vmovups	%xmm14,-16(%rsi)
-+	vpshufb	%xmm0,%xmm14,%xmm14
-+	vmovdqu	%xmm9,16(%rsp)
-+	vmovdqu	48(%rsp),%xmm6
-+	vmovdqu	16-32(%r9),%xmm0
-+	vpunpckhqdq	%xmm6,%xmm6,%xmm2
-+	vpclmulqdq	$0x00,%xmm3,%xmm7,%xmm5
-+	vpxor	%xmm6,%xmm2,%xmm2
-+	vpclmulqdq	$0x11,%xmm3,%xmm7,%xmm7
-+	vpclmulqdq	$0x00,%xmm15,%xmm1,%xmm1
-+
-+	vmovdqu	64(%rsp),%xmm9
-+	vpclmulqdq	$0x00,%xmm0,%xmm6,%xmm4
-+	vmovdqu	48-32(%r9),%xmm3
-+	vpxor	%xmm5,%xmm4,%xmm4
-+	vpunpckhqdq	%xmm9,%xmm9,%xmm5
-+	vpclmulqdq	$0x11,%xmm0,%xmm6,%xmm6
-+	vpxor	%xmm9,%xmm5,%xmm5
-+	vpxor	%xmm7,%xmm6,%xmm6
-+	vpclmulqdq	$0x10,%xmm15,%xmm2,%xmm2
-+	vmovdqu	80-32(%r9),%xmm15
-+	vpxor	%xmm1,%xmm2,%xmm2
-+
-+	vmovdqu	80(%rsp),%xmm1
-+	vpclmulqdq	$0x00,%xmm3,%xmm9,%xmm7
-+	vmovdqu	64-32(%r9),%xmm0
-+	vpxor	%xmm4,%xmm7,%xmm7
-+	vpunpckhqdq	%xmm1,%xmm1,%xmm4
-+	vpclmulqdq	$0x11,%xmm3,%xmm9,%xmm9
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpxor	%xmm6,%xmm9,%xmm9
-+	vpclmulqdq	$0x00,%xmm15,%xmm5,%xmm5
-+	vpxor	%xmm2,%xmm5,%xmm5
-+
-+	vmovdqu	96(%rsp),%xmm2
-+	vpclmulqdq	$0x00,%xmm0,%xmm1,%xmm6
-+	vmovdqu	96-32(%r9),%xmm3
-+	vpxor	%xmm7,%xmm6,%xmm6
-+	vpunpckhqdq	%xmm2,%xmm2,%xmm7
-+	vpclmulqdq	$0x11,%xmm0,%xmm1,%xmm1
-+	vpxor	%xmm2,%xmm7,%xmm7
-+	vpxor	%xmm9,%xmm1,%xmm1
-+	vpclmulqdq	$0x10,%xmm15,%xmm4,%xmm4
-+	vmovdqu	128-32(%r9),%xmm15
-+	vpxor	%xmm5,%xmm4,%xmm4
-+
-+	vpxor	112(%rsp),%xmm8,%xmm8
-+	vpclmulqdq	$0x00,%xmm3,%xmm2,%xmm5
-+	vmovdqu	112-32(%r9),%xmm0
-+	vpunpckhqdq	%xmm8,%xmm8,%xmm9
-+	vpxor	%xmm6,%xmm5,%xmm5
-+	vpclmulqdq	$0x11,%xmm3,%xmm2,%xmm2
-+	vpxor	%xmm8,%xmm9,%xmm9
-+	vpxor	%xmm1,%xmm2,%xmm2
-+	vpclmulqdq	$0x00,%xmm15,%xmm7,%xmm7
-+	vpxor	%xmm4,%xmm7,%xmm4
-+
-+	vpclmulqdq	$0x00,%xmm0,%xmm8,%xmm6
-+	vmovdqu	0-32(%r9),%xmm3
-+	vpunpckhqdq	%xmm14,%xmm14,%xmm1
-+	vpclmulqdq	$0x11,%xmm0,%xmm8,%xmm8
-+	vpxor	%xmm14,%xmm1,%xmm1
-+	vpxor	%xmm5,%xmm6,%xmm5
-+	vpclmulqdq	$0x10,%xmm15,%xmm9,%xmm9
-+	vmovdqu	32-32(%r9),%xmm15
-+	vpxor	%xmm2,%xmm8,%xmm7
-+	vpxor	%xmm4,%xmm9,%xmm6
-+
-+	vmovdqu	16-32(%r9),%xmm0
-+	vpxor	%xmm5,%xmm7,%xmm9
-+	vpclmulqdq	$0x00,%xmm3,%xmm14,%xmm4
-+	vpxor	%xmm9,%xmm6,%xmm6
-+	vpunpckhqdq	%xmm13,%xmm13,%xmm2
-+	vpclmulqdq	$0x11,%xmm3,%xmm14,%xmm14
-+	vpxor	%xmm13,%xmm2,%xmm2
-+	vpslldq	$8,%xmm6,%xmm9
-+	vpclmulqdq	$0x00,%xmm15,%xmm1,%xmm1
-+	vpxor	%xmm9,%xmm5,%xmm8
-+	vpsrldq	$8,%xmm6,%xmm6
-+	vpxor	%xmm6,%xmm7,%xmm7
-+
-+	vpclmulqdq	$0x00,%xmm0,%xmm13,%xmm5
-+	vmovdqu	48-32(%r9),%xmm3
-+	vpxor	%xmm4,%xmm5,%xmm5
-+	vpunpckhqdq	%xmm12,%xmm12,%xmm9
-+	vpclmulqdq	$0x11,%xmm0,%xmm13,%xmm13
-+	vpxor	%xmm12,%xmm9,%xmm9
-+	vpxor	%xmm14,%xmm13,%xmm13
-+	vpalignr	$8,%xmm8,%xmm8,%xmm14
-+	vpclmulqdq	$0x10,%xmm15,%xmm2,%xmm2
-+	vmovdqu	80-32(%r9),%xmm15
-+	vpxor	%xmm1,%xmm2,%xmm2
-+
-+	vpclmulqdq	$0x00,%xmm3,%xmm12,%xmm4
-+	vmovdqu	64-32(%r9),%xmm0
-+	vpxor	%xmm5,%xmm4,%xmm4
-+	vpunpckhqdq	%xmm11,%xmm11,%xmm1
-+	vpclmulqdq	$0x11,%xmm3,%xmm12,%xmm12
-+	vpxor	%xmm11,%xmm1,%xmm1
-+	vpxor	%xmm13,%xmm12,%xmm12
-+	vxorps	16(%rsp),%xmm7,%xmm7
-+	vpclmulqdq	$0x00,%xmm15,%xmm9,%xmm9
-+	vpxor	%xmm2,%xmm9,%xmm9
-+
-+	vpclmulqdq	$0x10,16(%r11),%xmm8,%xmm8
-+	vxorps	%xmm14,%xmm8,%xmm8
-+
-+	vpclmulqdq	$0x00,%xmm0,%xmm11,%xmm5
-+	vmovdqu	96-32(%r9),%xmm3
-+	vpxor	%xmm4,%xmm5,%xmm5
-+	vpunpckhqdq	%xmm10,%xmm10,%xmm2
-+	vpclmulqdq	$0x11,%xmm0,%xmm11,%xmm11
-+	vpxor	%xmm10,%xmm2,%xmm2
-+	vpalignr	$8,%xmm8,%xmm8,%xmm14
-+	vpxor	%xmm12,%xmm11,%xmm11
-+	vpclmulqdq	$0x10,%xmm15,%xmm1,%xmm1
-+	vmovdqu	128-32(%r9),%xmm15
-+	vpxor	%xmm9,%xmm1,%xmm1
-+
-+	vxorps	%xmm7,%xmm14,%xmm14
-+	vpclmulqdq	$0x10,16(%r11),%xmm8,%xmm8
-+	vxorps	%xmm14,%xmm8,%xmm8
-+
-+	vpclmulqdq	$0x00,%xmm3,%xmm10,%xmm4
-+	vmovdqu	112-32(%r9),%xmm0
-+	vpxor	%xmm5,%xmm4,%xmm4
-+	vpunpckhqdq	%xmm8,%xmm8,%xmm9
-+	vpclmulqdq	$0x11,%xmm3,%xmm10,%xmm10
-+	vpxor	%xmm8,%xmm9,%xmm9
-+	vpxor	%xmm11,%xmm10,%xmm10
-+	vpclmulqdq	$0x00,%xmm15,%xmm2,%xmm2
-+	vpxor	%xmm1,%xmm2,%xmm2
-+
-+	vpclmulqdq	$0x00,%xmm0,%xmm8,%xmm5
-+	vpclmulqdq	$0x11,%xmm0,%xmm8,%xmm7
-+	vpxor	%xmm4,%xmm5,%xmm5
-+	vpclmulqdq	$0x10,%xmm15,%xmm9,%xmm6
-+	vpxor	%xmm10,%xmm7,%xmm7
-+	vpxor	%xmm2,%xmm6,%xmm6
-+
-+	vpxor	%xmm5,%xmm7,%xmm4
-+	vpxor	%xmm4,%xmm6,%xmm6
-+	vpslldq	$8,%xmm6,%xmm1
-+	vmovdqu	16(%r11),%xmm3
-+	vpsrldq	$8,%xmm6,%xmm6
-+	vpxor	%xmm1,%xmm5,%xmm8
-+	vpxor	%xmm6,%xmm7,%xmm7
-+
-+	vpalignr	$8,%xmm8,%xmm8,%xmm2
-+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm8
-+	vpxor	%xmm2,%xmm8,%xmm8
-+
-+	vpalignr	$8,%xmm8,%xmm8,%xmm2
-+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm8
-+	vpxor	%xmm7,%xmm2,%xmm2
-+	vpxor	%xmm2,%xmm8,%xmm8
-+	vpshufb	(%r11),%xmm8,%xmm8
-+	vmovdqu	%xmm8,-64(%r9)
-+
-+	vzeroupper
-+	movq	-48(%rax),%r15
-+.cfi_restore	%r15
-+	movq	-40(%rax),%r14
-+.cfi_restore	%r14
-+	movq	-32(%rax),%r13
-+.cfi_restore	%r13
-+	movq	-24(%rax),%r12
-+.cfi_restore	%r12
-+	movq	-16(%rax),%rbp
-+.cfi_restore	%rbp
-+	movq	-8(%rax),%rbx
-+.cfi_restore	%rbx
-+	leaq	(%rax),%rsp
-+.cfi_def_cfa_register	%rsp
-+.Lgcm_enc_abort:
-+	movq	%r10,%rax
-+	.byte	0xf3,0xc3
-+.cfi_endproc
-+.size	aesni_gcm_encrypt,.-aesni_gcm_encrypt
-+
-+/* Some utility routines */
-+
-+/*
-+ * clear all fpu registers
-+ * void clear_fpu_regs_avx(void);
-+ */
-+.globl	clear_fpu_regs_avx
-+.type	clear_fpu_regs_avx,@function
-+.align	32
-+clear_fpu_regs_avx:
-+	vzeroall
-+	ret
-+.size	clear_fpu_regs_avx,.-clear_fpu_regs_avx
-+
-+/*
-+ * void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
-+ *
-+ * XORs one pair of unaligned 128-bit blocks from `src' and `dst' and
-+ * stores the result at `dst'. The XOR is performed using FPU registers,
-+ * so make sure FPU state is saved when running this in the kernel.
-+ */
-+.globl  gcm_xor_avx
-+.type	gcm_xor_avx,@function
-+.align	32
-+gcm_xor_avx:
-+	movdqu  (%rdi), %xmm0
-+	movdqu  (%rsi), %xmm1
-+	pxor    %xmm1, %xmm0
-+	movdqu  %xmm0, (%rsi)
-+	ret
-+.size	gcm_xor_avx,.-gcm_xor_avx
-+
-+/*
-+ * Toggle a boolean_t value atomically and return the new value.
-+ * boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
-+ */
-+.globl	atomic_toggle_boolean_nv
-+.type	atomic_toggle_boolean_nv,@function
-+.align	32
-+atomic_toggle_boolean_nv:
-+	xorl	%eax, %eax
-+	lock
-+	xorl	$1, (%rdi)
-+	jz	1f
-+	movl	$1, %eax
-+1:
-+	ret
-+.size	atomic_toggle_boolean_nv,.-atomic_toggle_boolean_nv
-+
-+.align	64
-+.Lbswap_mask:
-+.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
-+.Lpoly:
-+.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
-+.Lone_msb:
-+.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
-+.Ltwo_lsb:
-+.byte	2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-+.Lone_lsb:
-+.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-+.byte	65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-+.align	64
-+
-+/* Mark the stack non-executable. */
-+#if defined(__linux__) && defined(__ELF__)
-+.section .note.GNU-stack,"",%progbits
-+#endif
-+
-+#endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */
-Index: zfs-linux-0.8.3/module/icp/asm-x86_64/modes/ghash-x86_64.S
-===================================================================
---- /dev/null
-+++ zfs-linux-0.8.3/module/icp/asm-x86_64/modes/ghash-x86_64.S
-@@ -0,0 +1,714 @@
-+# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
-+#
-+# Licensed under the Apache License 2.0 (the "License").  You may not use
-+# this file except in compliance with the License.  You can obtain a copy
-+# in the file LICENSE in the source distribution or at
-+# https://www.openssl.org/source/license.html
-+
-+#
-+# ====================================================================
-+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-+# project. The module is, however, dual licensed under OpenSSL and
-+# CRYPTOGAMS licenses depending on where you obtain it. For further
-+# details see http://www.openssl.org/~appro/cryptogams/.
-+# ====================================================================
-+#
-+# March, June 2010
-+#
-+# The module implements "4-bit" GCM GHASH function and underlying
-+# single multiplication operation in GF(2^128). "4-bit" means that
-+# it uses 256 bytes per-key table [+128 bytes shared table]. GHASH
-+# function features so called "528B" variant utilizing additional
-+# 256+16 bytes of per-key storage [+512 bytes shared table].
-+# Performance results are for this streamed GHASH subroutine and are
-+# expressed in cycles per processed byte, less is better:
-+#
-+#		gcc 3.4.x(*)	assembler
-+#
-+# P4		28.6		14.0		+100%
-+# Opteron	19.3		7.7		+150%
-+# Core2		17.8		8.1(**)		+120%
-+# Atom		31.6		16.8		+88%
-+# VIA Nano	21.8		10.1		+115%
-+#
-+# (*)	comparison is not completely fair, because C results are
-+#	for vanilla "256B" implementation, while assembler results
-+#	are for "528B";-)
-+# (**)	it's mystery [to me] why Core2 result is not same as for
-+#	Opteron;
-+
-+# May 2010
-+#
-+# Add PCLMULQDQ version performing at 2.02 cycles per processed byte.
-+# See ghash-x86.pl for background information and details about coding
-+# techniques.
-+#
-+# Special thanks to David Woodhouse for providing access to a
-+# Westmere-based system on behalf of Intel Open Source Technology Centre.
-+
-+# December 2012
-+#
-+# Overhaul: aggregate Karatsuba post-processing, improve ILP in
-+# reduction_alg9, increase reduction aggregate factor to 4x. As for
-+# the latter. ghash-x86.pl discusses that it makes lesser sense to
-+# increase aggregate factor. Then why increase here? Critical path
-+# consists of 3 independent pclmulqdq instructions, Karatsuba post-
-+# processing and reduction. "On top" of this we lay down aggregated
-+# multiplication operations, triplets of independent pclmulqdq's. As
-+# issue rate for pclmulqdq is limited, it makes lesser sense to
-+# aggregate more multiplications than it takes to perform remaining
-+# non-multiplication operations. 2x is near-optimal coefficient for
-+# contemporary Intel CPUs (therefore modest improvement coefficient),
-+# but not for Bulldozer. Latter is because logical SIMD operations
-+# are twice as slow in comparison to Intel, so that critical path is
-+# longer. A CPU with higher pclmulqdq issue rate would also benefit
-+# from higher aggregate factor...
-+#
-+# Westmere	1.78(+13%)
-+# Sandy Bridge	1.80(+8%)
-+# Ivy Bridge	1.80(+7%)
-+# Haswell	0.55(+93%) (if system doesn't support AVX)
-+# Broadwell	0.45(+110%)(if system doesn't support AVX)
-+# Skylake	0.44(+110%)(if system doesn't support AVX)
-+# Bulldozer	1.49(+27%)
-+# Silvermont	2.88(+13%)
-+# Knights L	2.12(-)    (if system doesn't support AVX)
-+# Goldmont	1.08(+24%)
-+
-+# March 2013
-+#
-+# ... 8x aggregate factor AVX code path is using reduction algorithm
-+# suggested by Shay Gueron[1]. Even though contemporary AVX-capable
-+# CPUs such as Sandy and Ivy Bridge can execute it, the code performs
-+# sub-optimally in comparison to above mentioned version. But thanks
-+# to Ilya Albrekht and Max Locktyukhin of Intel Corp. we knew that
-+# it performs in 0.41 cycles per byte on Haswell processor, in
-+# 0.29 on Broadwell, and in 0.36 on Skylake.
-+#
-+# Knights Landing achieves 1.09 cpb.
-+#
-+# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
-+
-+# Generated once from
-+# https://github.com/openssl/openssl/blob/5ffc3324/crypto/modes/asm/ghash-x86_64.pl
-+# and modified for ICP. Modification are kept at a bare minimum to ease later
-+# upstream merges.
-+
-+#if defined(__x86_64__) && defined(HAVE_AVX) && \
-+    defined(HAVE_AES) && defined(HAVE_PCLMULQDQ)
-+
-+.text
-+
-+.globl	gcm_gmult_clmul
-+.type	gcm_gmult_clmul,@function
-+.align	16
-+gcm_gmult_clmul:
-+.cfi_startproc
-+.L_gmult_clmul:
-+	movdqu	(%rdi),%xmm0
-+	movdqa	.Lbswap_mask(%rip),%xmm5
-+	movdqu	(%rsi),%xmm2
-+	movdqu	32(%rsi),%xmm4
-+.byte	102,15,56,0,197
-+	movdqa	%xmm0,%xmm1
-+	pshufd	$78,%xmm0,%xmm3
-+	pxor	%xmm0,%xmm3
-+.byte	102,15,58,68,194,0
-+.byte	102,15,58,68,202,17
-+.byte	102,15,58,68,220,0
-+	pxor	%xmm0,%xmm3
-+	pxor	%xmm1,%xmm3
-+
-+	movdqa	%xmm3,%xmm4
-+	psrldq	$8,%xmm3
-+	pslldq	$8,%xmm4
-+	pxor	%xmm3,%xmm1
-+	pxor	%xmm4,%xmm0
-+
-+	movdqa	%xmm0,%xmm4
-+	movdqa	%xmm0,%xmm3
-+	psllq	$5,%xmm0
-+	pxor	%xmm0,%xmm3
-+	psllq	$1,%xmm0
-+	pxor	%xmm3,%xmm0
-+	psllq	$57,%xmm0
-+	movdqa	%xmm0,%xmm3
-+	pslldq	$8,%xmm0
-+	psrldq	$8,%xmm3
-+	pxor	%xmm4,%xmm0
-+	pxor	%xmm3,%xmm1
-+
-+
-+	movdqa	%xmm0,%xmm4
-+	psrlq	$1,%xmm0
-+	pxor	%xmm4,%xmm1
-+	pxor	%xmm0,%xmm4
-+	psrlq	$5,%xmm0
-+	pxor	%xmm4,%xmm0
-+	psrlq	$1,%xmm0
-+	pxor	%xmm1,%xmm0
-+.byte	102,15,56,0,197
-+	movdqu	%xmm0,(%rdi)
-+	.byte	0xf3,0xc3
-+.cfi_endproc
-+.size	gcm_gmult_clmul,.-gcm_gmult_clmul
-+
-+.globl	gcm_init_htab_avx
-+.type	gcm_init_htab_avx,@function
-+.align	32
-+gcm_init_htab_avx:
-+.cfi_startproc
-+	vzeroupper
-+
-+	vmovdqu	(%rsi),%xmm2
-+	// KCF/ICP stores H in network byte order with the hi qword first
-+	// so we need to swap all bytes, not the 2 qwords.
-+	vmovdqu	.Lbswap_mask(%rip),%xmm4
-+	vpshufb	%xmm4,%xmm2,%xmm2
-+
-+
-+	vpshufd	$255,%xmm2,%xmm4
-+	vpsrlq	$63,%xmm2,%xmm3
-+	vpsllq	$1,%xmm2,%xmm2
-+	vpxor	%xmm5,%xmm5,%xmm5
-+	vpcmpgtd	%xmm4,%xmm5,%xmm5
-+	vpslldq	$8,%xmm3,%xmm3
-+	vpor	%xmm3,%xmm2,%xmm2
-+
-+
-+	vpand	.L0x1c2_polynomial(%rip),%xmm5,%xmm5
-+	vpxor	%xmm5,%xmm2,%xmm2
-+
-+	vpunpckhqdq	%xmm2,%xmm2,%xmm6
-+	vmovdqa	%xmm2,%xmm0
-+	vpxor	%xmm2,%xmm6,%xmm6
-+	movq	$4,%r10
-+	jmp	.Linit_start_avx
-+.align	32
-+.Linit_loop_avx:
-+	vpalignr	$8,%xmm3,%xmm4,%xmm5
-+	vmovdqu	%xmm5,-16(%rdi)
-+	vpunpckhqdq	%xmm0,%xmm0,%xmm3
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
-+	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
-+	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
-+	vpxor	%xmm0,%xmm1,%xmm4
-+	vpxor	%xmm4,%xmm3,%xmm3
-+
-+	vpslldq	$8,%xmm3,%xmm4
-+	vpsrldq	$8,%xmm3,%xmm3
-+	vpxor	%xmm4,%xmm0,%xmm0
-+	vpxor	%xmm3,%xmm1,%xmm1
-+	vpsllq	$57,%xmm0,%xmm3
-+	vpsllq	$62,%xmm0,%xmm4
-+	vpxor	%xmm3,%xmm4,%xmm4
-+	vpsllq	$63,%xmm0,%xmm3
-+	vpxor	%xmm3,%xmm4,%xmm4
-+	vpslldq	$8,%xmm4,%xmm3
-+	vpsrldq	$8,%xmm4,%xmm4
-+	vpxor	%xmm3,%xmm0,%xmm0
-+	vpxor	%xmm4,%xmm1,%xmm1
-+
-+	vpsrlq	$1,%xmm0,%xmm4
-+	vpxor	%xmm0,%xmm1,%xmm1
-+	vpxor	%xmm4,%xmm0,%xmm0
-+	vpsrlq	$5,%xmm4,%xmm4
-+	vpxor	%xmm4,%xmm0,%xmm0
-+	vpsrlq	$1,%xmm0,%xmm0
-+	vpxor	%xmm1,%xmm0,%xmm0
-+.Linit_start_avx:
-+	vmovdqa	%xmm0,%xmm5
-+	vpunpckhqdq	%xmm0,%xmm0,%xmm3
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
-+	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
-+	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
-+	vpxor	%xmm0,%xmm1,%xmm4
-+	vpxor	%xmm4,%xmm3,%xmm3
-+
-+	vpslldq	$8,%xmm3,%xmm4
-+	vpsrldq	$8,%xmm3,%xmm3
-+	vpxor	%xmm4,%xmm0,%xmm0
-+	vpxor	%xmm3,%xmm1,%xmm1
-+	vpsllq	$57,%xmm0,%xmm3
-+	vpsllq	$62,%xmm0,%xmm4
-+	vpxor	%xmm3,%xmm4,%xmm4
-+	vpsllq	$63,%xmm0,%xmm3
-+	vpxor	%xmm3,%xmm4,%xmm4
-+	vpslldq	$8,%xmm4,%xmm3
-+	vpsrldq	$8,%xmm4,%xmm4
-+	vpxor	%xmm3,%xmm0,%xmm0
-+	vpxor	%xmm4,%xmm1,%xmm1
-+
-+	vpsrlq	$1,%xmm0,%xmm4
-+	vpxor	%xmm0,%xmm1,%xmm1
-+	vpxor	%xmm4,%xmm0,%xmm0
-+	vpsrlq	$5,%xmm4,%xmm4
-+	vpxor	%xmm4,%xmm0,%xmm0
-+	vpsrlq	$1,%xmm0,%xmm0
-+	vpxor	%xmm1,%xmm0,%xmm0
-+	vpshufd	$78,%xmm5,%xmm3
-+	vpshufd	$78,%xmm0,%xmm4
-+	vpxor	%xmm5,%xmm3,%xmm3
-+	vmovdqu	%xmm5,0(%rdi)
-+	vpxor	%xmm0,%xmm4,%xmm4
-+	vmovdqu	%xmm0,16(%rdi)
-+	leaq	48(%rdi),%rdi
-+	subq	$1,%r10
-+	jnz	.Linit_loop_avx
-+
-+	vpalignr	$8,%xmm4,%xmm3,%xmm5
-+	vmovdqu	%xmm5,-16(%rdi)
-+
-+	vzeroupper
-+	.byte	0xf3,0xc3
-+.cfi_endproc
-+.size	gcm_init_htab_avx,.-gcm_init_htab_avx
-+
-+.globl	gcm_gmult_avx
-+.type	gcm_gmult_avx,@function
-+.align	32
-+gcm_gmult_avx:
-+.cfi_startproc
-+	jmp	.L_gmult_clmul
-+.cfi_endproc
-+.size	gcm_gmult_avx,.-gcm_gmult_avx
-+.globl	gcm_ghash_avx
-+.type	gcm_ghash_avx,@function
-+.align	32
-+gcm_ghash_avx:
-+.cfi_startproc
-+	vzeroupper
-+
-+	vmovdqu	(%rdi),%xmm10
-+	leaq	.L0x1c2_polynomial(%rip),%r10
-+	leaq	64(%rsi),%rsi
-+	vmovdqu	.Lbswap_mask(%rip),%xmm13
-+	vpshufb	%xmm13,%xmm10,%xmm10
-+	cmpq	$0x80,%rcx
-+	jb	.Lshort_avx
-+	subq	$0x80,%rcx
-+
-+	vmovdqu	112(%rdx),%xmm14
-+	vmovdqu	0-64(%rsi),%xmm6
-+	vpshufb	%xmm13,%xmm14,%xmm14
-+	vmovdqu	32-64(%rsi),%xmm7
-+
-+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
-+	vmovdqu	96(%rdx),%xmm15
-+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
-+	vpxor	%xmm14,%xmm9,%xmm9
-+	vpshufb	%xmm13,%xmm15,%xmm15
-+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
-+	vmovdqu	16-64(%rsi),%xmm6
-+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
-+	vmovdqu	80(%rdx),%xmm14
-+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
-+	vpxor	%xmm15,%xmm8,%xmm8
-+
-+	vpshufb	%xmm13,%xmm14,%xmm14
-+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
-+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
-+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
-+	vmovdqu	48-64(%rsi),%xmm6
-+	vpxor	%xmm14,%xmm9,%xmm9
-+	vmovdqu	64(%rdx),%xmm15
-+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
-+	vmovdqu	80-64(%rsi),%xmm7
-+
-+	vpshufb	%xmm13,%xmm15,%xmm15
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
-+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
-+	vmovdqu	64-64(%rsi),%xmm6
-+	vpxor	%xmm2,%xmm5,%xmm5
-+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
-+	vpxor	%xmm15,%xmm8,%xmm8
-+
-+	vmovdqu	48(%rdx),%xmm14
-+	vpxor	%xmm3,%xmm0,%xmm0
-+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
-+	vpxor	%xmm4,%xmm1,%xmm1
-+	vpshufb	%xmm13,%xmm14,%xmm14
-+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
-+	vmovdqu	96-64(%rsi),%xmm6
-+	vpxor	%xmm5,%xmm2,%xmm2
-+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
-+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
-+	vmovdqu	128-64(%rsi),%xmm7
-+	vpxor	%xmm14,%xmm9,%xmm9
-+
-+	vmovdqu	32(%rdx),%xmm15
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpshufb	%xmm13,%xmm15,%xmm15
-+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
-+	vmovdqu	112-64(%rsi),%xmm6
-+	vpxor	%xmm2,%xmm5,%xmm5
-+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
-+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
-+	vpxor	%xmm15,%xmm8,%xmm8
-+
-+	vmovdqu	16(%rdx),%xmm14
-+	vpxor	%xmm3,%xmm0,%xmm0
-+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
-+	vpxor	%xmm4,%xmm1,%xmm1
-+	vpshufb	%xmm13,%xmm14,%xmm14
-+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
-+	vmovdqu	144-64(%rsi),%xmm6
-+	vpxor	%xmm5,%xmm2,%xmm2
-+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
-+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
-+	vmovdqu	176-64(%rsi),%xmm7
-+	vpxor	%xmm14,%xmm9,%xmm9
-+
-+	vmovdqu	(%rdx),%xmm15
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpshufb	%xmm13,%xmm15,%xmm15
-+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
-+	vmovdqu	160-64(%rsi),%xmm6
-+	vpxor	%xmm2,%xmm5,%xmm5
-+	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
-+
-+	leaq	128(%rdx),%rdx
-+	cmpq	$0x80,%rcx
-+	jb	.Ltail_avx
-+
-+	vpxor	%xmm10,%xmm15,%xmm15
-+	subq	$0x80,%rcx
-+	jmp	.Loop8x_avx
-+
-+.align	32
-+.Loop8x_avx:
-+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
-+	vmovdqu	112(%rdx),%xmm14
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpxor	%xmm15,%xmm8,%xmm8
-+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm10
-+	vpshufb	%xmm13,%xmm14,%xmm14
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm11
-+	vmovdqu	0-64(%rsi),%xmm6
-+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
-+	vpxor	%xmm2,%xmm5,%xmm5
-+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm12
-+	vmovdqu	32-64(%rsi),%xmm7
-+	vpxor	%xmm14,%xmm9,%xmm9
-+
-+	vmovdqu	96(%rdx),%xmm15
-+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
-+	vpxor	%xmm3,%xmm10,%xmm10
-+	vpshufb	%xmm13,%xmm15,%xmm15
-+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
-+	vxorps	%xmm4,%xmm11,%xmm11
-+	vmovdqu	16-64(%rsi),%xmm6
-+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
-+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
-+	vpxor	%xmm5,%xmm12,%xmm12
-+	vxorps	%xmm15,%xmm8,%xmm8
-+
-+	vmovdqu	80(%rdx),%xmm14
-+	vpxor	%xmm10,%xmm12,%xmm12
-+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
-+	vpxor	%xmm11,%xmm12,%xmm12
-+	vpslldq	$8,%xmm12,%xmm9
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
-+	vpsrldq	$8,%xmm12,%xmm12
-+	vpxor	%xmm9,%xmm10,%xmm10
-+	vmovdqu	48-64(%rsi),%xmm6
-+	vpshufb	%xmm13,%xmm14,%xmm14
-+	vxorps	%xmm12,%xmm11,%xmm11
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
-+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
-+	vmovdqu	80-64(%rsi),%xmm7
-+	vpxor	%xmm14,%xmm9,%xmm9
-+	vpxor	%xmm2,%xmm5,%xmm5
-+
-+	vmovdqu	64(%rdx),%xmm15
-+	vpalignr	$8,%xmm10,%xmm10,%xmm12
-+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
-+	vpshufb	%xmm13,%xmm15,%xmm15
-+	vpxor	%xmm3,%xmm0,%xmm0
-+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
-+	vmovdqu	64-64(%rsi),%xmm6
-+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
-+	vpxor	%xmm4,%xmm1,%xmm1
-+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
-+	vxorps	%xmm15,%xmm8,%xmm8
-+	vpxor	%xmm5,%xmm2,%xmm2
-+
-+	vmovdqu	48(%rdx),%xmm14
-+	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
-+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
-+	vpshufb	%xmm13,%xmm14,%xmm14
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
-+	vmovdqu	96-64(%rsi),%xmm6
-+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
-+	vmovdqu	128-64(%rsi),%xmm7
-+	vpxor	%xmm14,%xmm9,%xmm9
-+	vpxor	%xmm2,%xmm5,%xmm5
-+
-+	vmovdqu	32(%rdx),%xmm15
-+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
-+	vpshufb	%xmm13,%xmm15,%xmm15
-+	vpxor	%xmm3,%xmm0,%xmm0
-+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
-+	vmovdqu	112-64(%rsi),%xmm6
-+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
-+	vpxor	%xmm4,%xmm1,%xmm1
-+	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
-+	vpxor	%xmm15,%xmm8,%xmm8
-+	vpxor	%xmm5,%xmm2,%xmm2
-+	vxorps	%xmm12,%xmm10,%xmm10
-+
-+	vmovdqu	16(%rdx),%xmm14
-+	vpalignr	$8,%xmm10,%xmm10,%xmm12
-+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
-+	vpshufb	%xmm13,%xmm14,%xmm14
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
-+	vmovdqu	144-64(%rsi),%xmm6
-+	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
-+	vxorps	%xmm11,%xmm12,%xmm12
-+	vpunpckhqdq	%xmm14,%xmm14,%xmm9
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
-+	vmovdqu	176-64(%rsi),%xmm7
-+	vpxor	%xmm14,%xmm9,%xmm9
-+	vpxor	%xmm2,%xmm5,%xmm5
-+
-+	vmovdqu	(%rdx),%xmm15
-+	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
-+	vpshufb	%xmm13,%xmm15,%xmm15
-+	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
-+	vmovdqu	160-64(%rsi),%xmm6
-+	vpxor	%xmm12,%xmm15,%xmm15
-+	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
-+	vpxor	%xmm10,%xmm15,%xmm15
-+
-+	leaq	128(%rdx),%rdx
-+	subq	$0x80,%rcx
-+	jnc	.Loop8x_avx
-+
-+	addq	$0x80,%rcx
-+	jmp	.Ltail_no_xor_avx
-+
-+.align	32
-+.Lshort_avx:
-+	vmovdqu	-16(%rdx,%rcx,1),%xmm14
-+	leaq	(%rdx,%rcx,1),%rdx
-+	vmovdqu	0-64(%rsi),%xmm6
-+	vmovdqu	32-64(%rsi),%xmm7
-+	vpshufb	%xmm13,%xmm14,%xmm15
-+
-+	vmovdqa	%xmm0,%xmm3
-+	vmovdqa	%xmm1,%xmm4
-+	vmovdqa	%xmm2,%xmm5
-+	subq	$0x10,%rcx
-+	jz	.Ltail_avx
-+
-+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
-+	vpxor	%xmm15,%xmm8,%xmm8
-+	vmovdqu	-32(%rdx),%xmm14
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
-+	vmovdqu	16-64(%rsi),%xmm6
-+	vpshufb	%xmm13,%xmm14,%xmm15
-+	vpxor	%xmm2,%xmm5,%xmm5
-+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
-+	vpsrldq	$8,%xmm7,%xmm7
-+	subq	$0x10,%rcx
-+	jz	.Ltail_avx
-+
-+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
-+	vpxor	%xmm15,%xmm8,%xmm8
-+	vmovdqu	-48(%rdx),%xmm14
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
-+	vmovdqu	48-64(%rsi),%xmm6
-+	vpshufb	%xmm13,%xmm14,%xmm15
-+	vpxor	%xmm2,%xmm5,%xmm5
-+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
-+	vmovdqu	80-64(%rsi),%xmm7
-+	subq	$0x10,%rcx
-+	jz	.Ltail_avx
-+
-+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
-+	vpxor	%xmm15,%xmm8,%xmm8
-+	vmovdqu	-64(%rdx),%xmm14
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
-+	vmovdqu	64-64(%rsi),%xmm6
-+	vpshufb	%xmm13,%xmm14,%xmm15
-+	vpxor	%xmm2,%xmm5,%xmm5
-+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
-+	vpsrldq	$8,%xmm7,%xmm7
-+	subq	$0x10,%rcx
-+	jz	.Ltail_avx
-+
-+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
-+	vpxor	%xmm15,%xmm8,%xmm8
-+	vmovdqu	-80(%rdx),%xmm14
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
-+	vmovdqu	96-64(%rsi),%xmm6
-+	vpshufb	%xmm13,%xmm14,%xmm15
-+	vpxor	%xmm2,%xmm5,%xmm5
-+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
-+	vmovdqu	128-64(%rsi),%xmm7
-+	subq	$0x10,%rcx
-+	jz	.Ltail_avx
-+
-+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
-+	vpxor	%xmm15,%xmm8,%xmm8
-+	vmovdqu	-96(%rdx),%xmm14
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
-+	vmovdqu	112-64(%rsi),%xmm6
-+	vpshufb	%xmm13,%xmm14,%xmm15
-+	vpxor	%xmm2,%xmm5,%xmm5
-+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
-+	vpsrldq	$8,%xmm7,%xmm7
-+	subq	$0x10,%rcx
-+	jz	.Ltail_avx
-+
-+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
-+	vpxor	%xmm15,%xmm8,%xmm8
-+	vmovdqu	-112(%rdx),%xmm14
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
-+	vmovdqu	144-64(%rsi),%xmm6
-+	vpshufb	%xmm13,%xmm14,%xmm15
-+	vpxor	%xmm2,%xmm5,%xmm5
-+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
-+	vmovq	184-64(%rsi),%xmm7
-+	subq	$0x10,%rcx
-+	jmp	.Ltail_avx
-+
-+.align	32
-+.Ltail_avx:
-+	vpxor	%xmm10,%xmm15,%xmm15
-+.Ltail_no_xor_avx:
-+	vpunpckhqdq	%xmm15,%xmm15,%xmm8
-+	vpxor	%xmm0,%xmm3,%xmm3
-+	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
-+	vpxor	%xmm15,%xmm8,%xmm8
-+	vpxor	%xmm1,%xmm4,%xmm4
-+	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
-+	vpxor	%xmm2,%xmm5,%xmm5
-+	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
-+
-+	vmovdqu	(%r10),%xmm12
-+
-+	vpxor	%xmm0,%xmm3,%xmm10
-+	vpxor	%xmm1,%xmm4,%xmm11
-+	vpxor	%xmm2,%xmm5,%xmm5
-+
-+	vpxor	%xmm10,%xmm5,%xmm5
-+	vpxor	%xmm11,%xmm5,%xmm5
-+	vpslldq	$8,%xmm5,%xmm9
-+	vpsrldq	$8,%xmm5,%xmm5
-+	vpxor	%xmm9,%xmm10,%xmm10
-+	vpxor	%xmm5,%xmm11,%xmm11
-+
-+	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
-+	vpalignr	$8,%xmm10,%xmm10,%xmm10
-+	vpxor	%xmm9,%xmm10,%xmm10
-+
-+	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
-+	vpalignr	$8,%xmm10,%xmm10,%xmm10
-+	vpxor	%xmm11,%xmm10,%xmm10
-+	vpxor	%xmm9,%xmm10,%xmm10
-+
-+	cmpq	$0,%rcx
-+	jne	.Lshort_avx
-+
-+	vpshufb	%xmm13,%xmm10,%xmm10
-+	vmovdqu	%xmm10,(%rdi)
-+	vzeroupper
-+	.byte	0xf3,0xc3
-+.cfi_endproc
-+.size	gcm_ghash_avx,.-gcm_ghash_avx
-+.align	64
-+.Lbswap_mask:
-+.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
-+.L0x1c2_polynomial:
-+.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
-+.L7_mask:
-+.long	7,0,7,0
-+.L7_mask_poly:
-+.long	7,0,450,0
-+.align	64
-+.type	.Lrem_4bit,@object
-+.Lrem_4bit:
-+.long	0,0,0,471859200,0,943718400,0,610271232
-+.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
-+.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
-+.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
-+.type	.Lrem_8bit,@object
-+.Lrem_8bit:
-+.value	0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
-+.value	0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
-+.value	0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
-+.value	0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
-+.value	0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
-+.value	0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
-+.value	0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
-+.value	0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
-+.value	0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
-+.value	0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
-+.value	0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
-+.value	0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
-+.value	0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
-+.value	0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
-+.value	0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
-+.value	0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
-+.value	0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
-+.value	0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
-+.value	0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
-+.value	0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
-+.value	0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
-+.value	0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
-+.value	0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
-+.value	0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
-+.value	0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
-+.value	0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
-+.value	0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
-+.value	0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
-+.value	0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
-+.value	0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
-+.value	0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
-+.value	0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
-+
-+.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-+.align	64
-+
-+/* Mark the stack non-executable. */
-+#if defined(__linux__) && defined(__ELF__)
-+.section .note.GNU-stack,"",%progbits
-+#endif
-+
-+#endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */
-Index: zfs-linux-0.8.3/module/icp/include/aes/aes_impl.h
-===================================================================
---- zfs-linux-0.8.3.orig/module/icp/include/aes/aes_impl.h
-+++ zfs-linux-0.8.3/module/icp/include/aes/aes_impl.h
-@@ -107,6 +107,11 @@ typedef union {
- } aes_ks_t;
- 
- typedef struct aes_impl_ops aes_impl_ops_t;
-+
-+/*
-+ * The absolute offset of the encr_ks (0) and the nr (504) fields are hard
-+ * coded in aesni-gcm-x86_64, so please don't change (or adjust accordingly).
-+ */
- typedef struct aes_key aes_key_t;
- struct aes_key {
- 	aes_ks_t	encr_ks;  /* encryption key schedule */
-Index: zfs-linux-0.8.3/module/icp/include/modes/modes.h
-===================================================================
---- zfs-linux-0.8.3.orig/module/icp/include/modes/modes.h
-+++ zfs-linux-0.8.3/module/icp/include/modes/modes.h
-@@ -34,6 +34,16 @@ extern "C" {
- #include <sys/crypto/common.h>
- #include <sys/crypto/impl.h>
- 
-+/*
-+ * Does the build chain support all instructions needed for the GCM assembler
-+ * routines. AVX support should imply AES-NI and PCLMULQDQ, but make sure
-+ * anyhow.
-+ */
-+#if defined(__x86_64__) && defined(HAVE_AVX) && \
-+    defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) && defined(HAVE_MOVBE)
-+#define	CAN_USE_GCM_ASM
-+#endif
-+
- #define	ECB_MODE			0x00000002
- #define	CBC_MODE			0x00000004
- #define	CTR_MODE			0x00000008
-@@ -189,13 +199,17 @@ typedef struct ccm_ctx {
-  *
-  * gcm_H:		Subkey.
-  *
-+ * gcm_Htable:		Pre-computed and pre-shifted H, H^2, ... H^6 for the
-+ *			Karatsuba Algorithm in host byte order.
-+ *
-  * gcm_J0:		Pre-counter block generated from the IV.
-  *
-  * gcm_len_a_len_c:	64-bit representations of the bit lengths of
-  *			AAD and ciphertext.
-  *
-- * gcm_kmflag:		Current value of kmflag. Used only for allocating
-- *			the plaintext buffer during decryption.
-+ * gcm_kmflag:		Current value of kmflag. Used for allocating
-+ *			the plaintext buffer during decryption and a
-+ *			gcm_avx_chunk_size'd buffer for avx enabled encryption.
-  */
- typedef struct gcm_ctx {
- 	struct common_ctx gcm_common;
-@@ -203,12 +217,23 @@ typedef struct gcm_ctx {
- 	size_t gcm_processed_data_len;
- 	size_t gcm_pt_buf_len;
- 	uint32_t gcm_tmp[4];
-+	/*
-+	 * The relative positions of gcm_ghash, gcm_H and pre-computed
-+	 * gcm_Htable are hard coded in aesni-gcm-x86_64.S and ghash-x86_64.S,
-+	 * so please don't change (or adjust accordingly).
-+	 */
- 	uint64_t gcm_ghash[2];
- 	uint64_t gcm_H[2];
-+#ifdef CAN_USE_GCM_ASM
-+	uint64_t gcm_Htable[12][2];
-+#endif
- 	uint64_t gcm_J0[2];
- 	uint64_t gcm_len_a_len_c[2];
- 	uint8_t *gcm_pt_buf;
- 	int gcm_kmflag;
-+#ifdef CAN_USE_GCM_ASM
-+	boolean_t gcm_use_avx;
-+#endif
- } gcm_ctx_t;
- 
- #define	gcm_keysched		gcm_common.cc_keysched
-Index: zfs-linux-0.8.3/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh
-===================================================================
---- zfs-linux-0.8.3.orig/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh
-+++ zfs-linux-0.8.3/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh
-@@ -53,7 +53,7 @@ set -A ENCRYPTION_ALGS \
- 	"encryption=aes-256-gcm"
- 
- set -A ENCRYPTION_PROPS \
--	"encryption=aes-256-ccm" \
-+	"encryption=aes-256-gcm" \
- 	"encryption=aes-128-ccm" \
- 	"encryption=aes-192-ccm" \
- 	"encryption=aes-256-ccm" \
-Index: zfs-linux-0.8.3/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh
-===================================================================
---- zfs-linux-0.8.3.orig/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh
-+++ zfs-linux-0.8.3/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh
-@@ -48,7 +48,7 @@ set -A ENCRYPTION_ALGS "encryption=on" \
- 	"encryption=aes-192-gcm" \
- 	"encryption=aes-256-gcm"
- 
--set -A ENCRYPTION_PROPS "encryption=aes-256-ccm" \
-+set -A ENCRYPTION_PROPS "encryption=aes-256-gcm" \
- 	"encryption=aes-128-ccm" \
- 	"encryption=aes-192-ccm" \
- 	"encryption=aes-256-ccm" \
-Index: zfs-linux-0.8.3/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh
-===================================================================
---- zfs-linux-0.8.3.orig/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh
-+++ zfs-linux-0.8.3/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh
-@@ -124,7 +124,7 @@ ds=$TESTPOOL/recv
- log_must eval "zfs send $snap > $sendfile"
- log_must eval "zfs recv -o encryption=on -o keyformat=passphrase" \
- 	"-o keylocation=file://$keyfile $ds < $sendfile"
--log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm"
-+log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm"
- log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds"
- log_must test "$(get_prop 'keyformat' $ds)" == "passphrase"
- log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile"
-@@ -140,7 +140,7 @@ ds=$TESTPOOL/recv
- log_must eval "zfs send -p $snap > $sendfile"
- log_must eval "zfs recv -o encryption=on -o keyformat=passphrase" \
- 	"-o keylocation=file://$keyfile $ds < $sendfile"
--log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm"
-+log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm"
- log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds"
- log_must test "$(get_prop 'keyformat' $ds)" == "passphrase"
- log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile"
-@@ -158,7 +158,7 @@ ds=$TESTPOOL/recv
- log_must eval "zfs send -R $snap > $sendfile"
- log_must eval "zfs recv -o encryption=on -o keyformat=passphrase" \
- 	"-o keylocation=file://$keyfile $ds < $sendfile"
--log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm"
-+log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm"
- log_must test "$(get_prop 'encryptionroot' $ds)" == "$ds"
- log_must test "$(get_prop 'keyformat' $ds)" == "passphrase"
- log_must test "$(get_prop 'keylocation' $ds)" == "file://$keyfile"
-@@ -174,7 +174,7 @@ ds=$TESTPOOL/crypt/recv
- log_must eval "zfs send -p $snap > $sendfile"
- log_must eval "zfs recv -x encryption $ds < $sendfile"
- log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt"
--log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm"
-+log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm"
- log_must test "$(get_prop 'keyformat' $ds)" == "passphrase"
- log_must test "$(get_prop 'mounted' $ds)" == "yes"
- recv_cksum=$(md5digest /$ds/$TESTFILE0)
-@@ -188,7 +188,7 @@ ds=$TESTPOOL/crypt/recv
- log_must eval "zfs send -R $snap > $sendfile"
- log_must eval "zfs recv -x encryption $ds < $sendfile"
- log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt"
--log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm"
-+log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm"
- log_must test "$(get_prop 'keyformat' $ds)" == "passphrase"
- log_must test "$(get_prop 'mounted' $ds)" == "yes"
- recv_cksum=$(md5digest /$ds/$TESTFILE0)
-@@ -202,7 +202,7 @@ ds=$TESTPOOL/crypt/recv
- log_must eval "zfs send -R $snap2 > $sendfile"
- log_must eval "zfs recv -x encryption $ds < $sendfile"
- log_must test "$(get_prop 'encryptionroot' $ds)" == "$TESTPOOL/crypt"
--log_must test "$(get_prop 'encryption' $ds)" == "aes-256-ccm"
-+log_must test "$(get_prop 'encryption' $ds)" == "aes-256-gcm"
- log_must test "$(get_prop 'keyformat' $ds)" == "passphrase"
- log_must test "$(get_prop 'mounted' $ds)" == "yes"
- recv_cksum=$(md5digest /$ds/$TESTFILE0)
diff '--exclude=.git' -Npaur ubuntu/zfs-linux/debian/patches/overlay.patch zfs-linux/debian/patches/overlay.patch
--- ubuntu/zfs-linux/debian/patches/overlay.patch	1969-12-31 19:00:00.000000000 -0500
+++ zfs-linux/debian/patches/overlay.patch	2020-05-13 22:44:28.617584274 -0400
@@ -0,0 +1,14 @@
+
+diff --git a/etc/default/zfs.in b/etc/default/zfs.in
+index ce719734c0c..9439954b8ac 100644
+--- a/etc/default/zfs.in
++++ b/etc/default/zfs.in
+@@ -66,7 +66,7 @@ VERBOSE_MOUNT='no'
+ # Should we allow overlay mounts?
+ # This is standard in Linux, but not ZFS which comes from Solaris where this
+ # is not allowed).
+-DO_OVERLAY_MOUNTS='no'
++DO_OVERLAY_MOUNTS='yes'
+ 
+ # Any additional option to the 'zfs import' commandline?
+ # Include '-o' for each option wanted.
diff '--exclude=.git' -Npaur ubuntu/zfs-linux/debian/patches/pr10163.patch zfs-linux/debian/patches/pr10163.patch
--- ubuntu/zfs-linux/debian/patches/pr10163.patch	1969-12-31 19:00:00.000000000 -0500
+++ zfs-linux/debian/patches/pr10163.patch	2020-05-13 14:42:37.000000000 -0400
@@ -0,0 +1,324 @@
+From 1335fc698150e42a21df5681a8b8bbbf6102b8fc Mon Sep 17 00:00:00 2001
+From: Matthew Ahrens <mahrens@delphix.com>
+Date: Fri, 27 Mar 2020 10:45:25 -0700
+Subject: [PATCH] Improve ZVOL sync write performance by using a taskq
+
+== Summary ==
+
+Prior to this change, sync writes to a zvol are processed serially.
+This commit makes zvols process concurrently outstanding sync writes in
+parallel, similar to how reads and async writes are already handled.
+The result is that the throughput of sync writes is tripled.
+
+== Background ==
+
+When a write comes in for a zvol (e.g. over iscsi), it is processed by
+calling `zvol_request()` to initiate the operation.  ZFS is expected to
+later call `BIO_END_IO()` when the operation completes (possibly from a
+different thread).  There are a limited number of threads that are
+available to call `zvol_request()` - one one per iscsi client (unless
+using MC/S).  Therefore, to ensure good performance, the latency of
+`zvol_request()` is important, so that many i/o operations to the zvol
+can be processed concurrently.  In other words, if the client has
+multiple outstanding requests to the zvol, the zvol should have multiple
+outstanding requests to the storage hardware (i.e. issue multiple
+concurrent `zio_t`'s).
+
+For reads, and async writes (i.e. writes which can be acknowledged
+before the data reaches stable storage), `zvol_request()` achieves low
+latency by dispatching the bulk of the work (including waiting for i/o
+to disk) to a taskq.  The taskq callback (`zvol_read()` or
+`zvol_write()`) blocks while waiting for the i/o to disk to complete.
+The `zvol_taskq` has 32 threads (by default), so we can have up to 32
+concurrent i/os to disk in service of requests to zvols.
+
+However, for sync writes (i.e. writes which must be persisted to stable
+storage before they can be acknowledged, by calling `zil_commit()`),
+`zvol_request()` does not use `zvol_taskq`.  Instead it blocks while
+waiting for the ZIL write to disk to complete.  This has the effect of
+serializing sync writes to each zvol.  In other words, each zvol will
+only process one sync write at a time, waiting for it to be written to
+the ZIL before accepting the next request.
+
+The same issue applies to FLUSH operations, for which `zvol_request()`
+calls `zil_commit()` directly.
+
+== Description of change ==
+
+This commit changes `zvol_request()` to use
+`taskq_dispatch_ent(zvol_taskq)` for sync writes, and FLUSh operations.
+Therefore we can have up to 32 threads (the taskq threads)
+simultaneously calling `zil_commit()`, for a theoretical performance
+improvement of up to 32x.
+
+To avoid the locking issue described in the comment (which this commit
+removes), we acquire the rangelock from the taskq callback (e.g.
+`zvol_write()`) rather than from `zvol_request()`.  This applies to all
+writes (sync and async), reads, and discard operations.  This means that
+multiple simultaneously-outstanding i/o's which access the same block
+can complete in any order.  This was previously thought to be incorrect,
+but a review of the block device interface requirements revealed that
+this is fine - the order is inherently not defined.  The shorter hold
+time of the rangelock should also have a slight performance improvement.
+
+For an additional slight performance improvement, we use
+`taskq_dispatch_ent()` instead of `taskq_dispatch()`, which avoids a
+`kmem_alloc()` and eliminates a failure mode.  This applies to all
+writes (sync and async), reads, and discard operations.
+
+== Performance results ==
+
+We used a zvol as an iscsi target (server) for a Windows initiator
+(client), with a single connection (the default - i.e. not MC/S).
+
+We used `diskspd` to generate a workload with 4 threads, doing 1MB
+writes to random offsets in the zvol.  Without this change we get
+231MB/s, and with the change we get 728MB/s, which is 3.15x the original
+performance.
+
+We ran a real-world workload, restoring a MSSQL database, and saw
+throughput 2.5x the original.
+
+We saw more modest performance wins (typically 1.5x-2x) when using MC/S
+with 4 connections, and with different number of client threads (1, 8,
+32).
+
+Signed-off-by: Matthew Ahrens <mahrens@delphix.com>
+---
+ module/zfs/zvol.c | 122 ++++++++++++++++++++++------------
+ 1 file changed, 78 insertions(+), 44 deletions(-)
+
+diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
+index ce719734c0c..9439954b8ac 100644
+--- a/module/zfs/zvol.c
++++ b/module/zfs/zvol.c
+@@ -38,9 +38,6 @@
+  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
+  * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
+  */
+-/*
+- * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
+- */
+ 
+ /*
+  * Note on locking of zvol state structures.
+@@ -723,7 +720,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_
+ typedef struct zv_request {
+ 	zvol_state_t	*zv;
+ 	struct bio	*bio;
+-	taskq_ent_t	ent;
++	zfs_locked_range_t	*lr;
+ } zv_request_t;
+ 
+ static void
+@@ -752,18 +749,6 @@ zvol_write(void *arg)
+ 	ASSERT(zv && zv->zv_open_count > 0);
+ 	ASSERT(zv->zv_zilog != NULL);
+ 
+-	/* bio marked as FLUSH need to flush before write */
+-	if (bio_is_flush(bio))
+-		zil_commit(zv->zv_zilog, ZVOL_OBJ);
+-
+-	/* Some requests are just for flush and nothing else. */
+-	if (uio.uio_resid == 0) {
+-		rw_exit(&zv->zv_suspend_lock);
+-		BIO_END_IO(bio, 0);
+-		kmem_free(zvr, sizeof (zv_request_t));
+-		return;
+-	}
+-
+ 	ssize_t start_resid = uio.uio_resid;
+ 	unsigned long start_jif = jiffies;
+ 	blk_generic_start_io_acct(zv->zv_queue, WRITE, bio_sectors(bio),
+@@ -772,9 +757,6 @@ zvol_write(void *arg)
+ 	boolean_t sync =
+ 	    bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
+ 
+-	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
+-	    uio.uio_loffset, uio.uio_resid, RL_WRITER);
+-
+ 	uint64_t volsize = zv->zv_volsize;
+ 	while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
+ 		uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);
+@@ -801,7 +783,7 @@ zvol_write(void *arg)
+ 		if (error)
+ 			break;
+ 	}
+-	zfs_rangelock_exit(lr);
++	zfs_rangelock_exit(zvr->lr);
+ 
+ 	int64_t nwritten = start_resid - uio.uio_resid;
+ 	dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten);
+@@ -884,9 +866,6 @@ zvol_discard(void *arg)
+ 	if (start >= end)
+ 		goto unlock;
+ 
+-	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
+-	    start, size, RL_WRITER);
+-
+ 	tx = dmu_tx_create(zv->zv_objset);
+ 	dmu_tx_mark_netfree(tx);
+ 	error = dmu_tx_assign(tx, TXG_WAIT);
+@@ -898,12 +877,12 @@ zvol_discard(void *arg)
+ 		error = dmu_free_long_range(zv->zv_objset,
+ 		    ZVOL_OBJ, start, size);
+ 	}
+-	zfs_rangelock_exit(lr);
++unlock:
++	zfs_rangelock_exit(zvr->lr);
+ 
+ 	if (error == 0 && sync)
+ 		zil_commit(zv->zv_zilog, ZVOL_OBJ);
+ 
+-unlock:
+ 	rw_exit(&zv->zv_suspend_lock);
+ 	blk_generic_end_io_acct(zv->zv_queue, WRITE, &zv->zv_disk->part0,
+ 	    start_jif);
+@@ -929,9 +908,6 @@ zvol_read(void *arg)
+ 	blk_generic_start_io_acct(zv->zv_queue, READ, bio_sectors(bio),
+ 	    &zv->zv_disk->part0);
+ 
+-	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
+-	    uio.uio_loffset, uio.uio_resid, RL_READER);
+-
+ 	uint64_t volsize = zv->zv_volsize;
+ 	while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
+ 		uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);
+@@ -948,7 +924,7 @@ zvol_read(void *arg)
+ 			break;
+ 		}
+ 	}
+-	zfs_rangelock_exit(lr);
++	zfs_rangelock_exit(zvr->lr);
+ 
+ 	int64_t nread = start_resid - uio.uio_resid;
+ 	dataset_kstats_update_read_kstats(&zv->zv_kstat, nread);
+@@ -1063,15 +1039,16 @@ zvol_request(struct request_queue *q, st
+ 	}
+ 
+ 	if (rw == WRITE) {
++		boolean_t need_sync = B_FALSE;
++
+ 		if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
+ 			BIO_END_IO(bio, -SET_ERROR(EROFS));
+ 			goto out;
+ 		}
+ 
+ 		/*
+-		 * Prevents the zvol from being suspended, or the ZIL being
+-		 * concurrently opened.  Will be released after the i/o
+-		 * completes.
++		 * To be released in the I/O function. See the comment on
++		 * rangelock_enter() below.
+ 		 */
+ 		rw_enter(&zv->zv_suspend_lock, RW_READER);
+ 
+@@ -1092,55 +1069,47 @@ zvol_request(struct request_queue *q, st
+ 			rw_downgrade(&zv->zv_suspend_lock);
+ 		}
+ 
++		/* bio marked as FLUSH need to flush before write */
++		if (bio_is_flush(bio))
++			zil_commit(zv->zv_zilog, ZVOL_OBJ);
++
++		/* Some requests are just for flush and nothing else. */
++		if (size == 0) {
++			rw_exit(&zv->zv_suspend_lock);
++			BIO_END_IO(bio, 0);
++			goto out;
++		}
++
+ 		zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
+ 		zvr->zv = zv;
+ 		zvr->bio = bio;
+-		taskq_init_ent(&zvr->ent);
+ 
+ 		/*
+-		 * We don't want this thread to be blocked waiting for i/o to
+-		 * complete, so we instead wait from a taskq callback. The
+-		 * i/o may be a ZIL write (via zil_commit()), or a read of an
+-		 * indirect block, or a read of a data block (if this is a
+-		 * partial-block write).  We will indicate that the i/o is
+-		 * complete by calling BIO_END_IO() from the taskq callback.
+-		 *
+-		 * This design allows the calling thread to continue and
+-		 * initiate more concurrent operations by calling
+-		 * zvol_request() again. There are typically only a small
+-		 * number of threads available to call zvol_request() (e.g.
+-		 * one per iSCSI target), so keeping the latency of
+-		 * zvol_request() low is important for performance.
+-		 *
+-		 * The zvol_request_sync module parameter allows this
+-		 * behavior to be altered, for performance evaluation
+-		 * purposes.  If the callback blocks, setting
+-		 * zvol_request_sync=1 will result in much worse performance.
+-		 *
+-		 * We can have up to zvol_threads concurrent i/o's being
+-		 * processed for all zvols on the system.  This is typically
+-		 * a vast improvement over the zvol_request_sync=1 behavior
+-		 * of one i/o at a time per zvol.  However, an even better
+-		 * design would be for zvol_request() to initiate the zio
+-		 * directly, and then be notified by the zio_done callback,
+-		 * which would call BIO_END_IO().  Unfortunately, the DMU/ZIL
+-		 * interfaces lack this functionality (they block waiting for
+-		 * the i/o to complete).
++		 * To be released in the I/O function. Since the I/O functions
++		 * are asynchronous, we take it here synchronously to make
++		 * sure overlapped I/Os are properly ordered.
+ 		 */
++		zvr->lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, size,
++		    RL_WRITER);
++		/*
++		 * Sync writes and discards execute zil_commit() which may need
++		 * to take a RL_READER lock on the whole block being modified
++		 * via its zillog->zl_get_data(): to avoid circular dependency
++		 * issues with taskq threads execute these requests
++		 * synchronously here in zvol_request().
++		 */
++		need_sync = bio_is_fua(bio) ||
++		    zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
+ 		if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
+-			if (zvol_request_sync) {
++			if (zvol_request_sync || need_sync ||
++			    taskq_dispatch(zvol_taskq, zvol_discard, zvr,
++			    TQ_SLEEP) == TASKQID_INVALID)
+ 				zvol_discard(zvr);
+-			} else {
+-				taskq_dispatch_ent(zvol_taskq,
+-				    zvol_discard, zvr, 0, &zvr->ent);
+-			}
+ 		} else {
+-			if (zvol_request_sync) {
++			if (zvol_request_sync || need_sync ||
++			    taskq_dispatch(zvol_taskq, zvol_write, zvr,
++			    TQ_SLEEP) == TASKQID_INVALID)
+ 				zvol_write(zvr);
+-			} else {
+-				taskq_dispatch_ent(zvol_taskq,
+-				    zvol_write, zvr, 0, &zvr->ent);
+-			}
+ 		}
+ 	} else {
+ 		/*
+@@ -1156,17 +1125,14 @@ zvol_request(struct request_queue *q, st
+ 		zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
+ 		zvr->zv = zv;
+ 		zvr->bio = bio;
+-		taskq_init_ent(&zvr->ent);
+ 
+ 		rw_enter(&zv->zv_suspend_lock, RW_READER);
+ 
+-		/* See comment in WRITE case above. */
+-		if (zvol_request_sync) {
++		zvr->lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, size,
++		    RL_READER);
++		if (zvol_request_sync || taskq_dispatch(zvol_taskq,
++		    zvol_read, zvr, TQ_SLEEP) == TASKQID_INVALID)
+ 			zvol_read(zvr);
+-		} else {
+-			taskq_dispatch_ent(zvol_taskq,
+-			    zvol_read, zvr, 0, &zvr->ent);
+-		}
+ 	}
+ 
+ out:
diff '--exclude=.git' -Npaur ubuntu/zfs-linux/debian/patches/pr10184.patch zfs-linux/debian/patches/pr10184.patch
--- ubuntu/zfs-linux/debian/patches/pr10184.patch	1969-12-31 19:00:00.000000000 -0500
+++ zfs-linux/debian/patches/pr10184.patch	2020-05-13 19:22:23.600999520 -0400
@@ -0,0 +1,29 @@
+From 1eb1f33c6914e6419d4f4699bf45dfe8e288eeb7 Mon Sep 17 00:00:00 2001
+From: Matthew Ahrens <mahrens@delphix.com>
+Date: Mon, 6 Apr 2020 08:01:45 -0700
+Subject: [PATCH] zvol_write() can use dmu_tx_hold_write_by_dnode()
+
+We can improve the performance of writes to zvols by using
+dmu_tx_hold_write_by_dnode() instead of dmu_tx_hold_write().  This
+reduces lock contention on the first block of the dnode object, and also
+reduces the amount of CPU needed.  The benefit will be highest with
+multi-threaded async writes (i.e. writes that don't call zil_commit()).
+
+Signed-off-by: Matthew Ahrens <mahrens@delphix.com>
+---
+ module/zfs/zvol.c | 122 ++++++++++++++++++++++------------
+ 1 file changed, 78 insertions(+), 44 deletions(-)
+
+diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
+index ce719734c0c..9439954b8ac 100644
+--- a/module/zfs/zvol.c
++++ b/module/zfs/zvol.c
+@@ -125,7 +125,7 @@ zvol_write(void *arg)
+ 		if (bytes > volsize - off)	/* don't write past the end */
+ 			bytes = volsize - off;
+ 
+-		dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
++		dmu_tx_hold_write_by_dnode(tx, zv->zv_dn, off, bytes);
+ 
+ 		/* This will only fail for ENOSPC */
+ 		error = dmu_tx_assign(tx, TXG_WAIT);
diff '--exclude=.git' -Npaur ubuntu/zfs-linux/debian/patches/series zfs-linux/debian/patches/series
--- ubuntu/zfs-linux/debian/patches/series	2020-05-16 17:46:19.019566887 -0400
+++ zfs-linux/debian/patches/series	2020-05-16 17:52:12.944742162 -0400
@@ -1,21 +1,23 @@
-0001-Prevent-manual-builds-in-the-DKMS-source.patch
-0002-Check-for-META-and-DCH-consistency-in-autoconf.patch
+#0001-Prevent-manual-builds-in-the-DKMS-source.patch
+#0002-Check-for-META-and-DCH-consistency-in-autoconf.patch
 0003-relocate-zvol_wait.patch
 enable-zed.patch
-1004-zed-service-bindir.patch
-2100-zfs-load-module.patch
-2200-add-zfs-0.6.x-ioctl-compat-shim.patch
+#1004-zed-service-bindir.patch
+2101-zfs-load-module.patch
+#2200-add-zfs-0.6.x-ioctl-compat-shim.patch
 3100-remove-libzfs-module-timeout.patch
 3302-Use-obj-m-instead-of-subdir-m.patch
-4000-zsys-support.patch
+4001-zsys-support.patch
 4100-disable-bpool-upgrade.patch
 force-verbose-rules.patch
 
 #unapplied/init-debian-openrc-workaround.patch # OpenRC users can apply this locally
-4550-Linux-5.5-compat-blkg_tryget.patch
-4600-Linux-5.6-compat-struct-proc_ops.patch
-4601-Linux-5.6-compat-timestamp_truncate.patch
-4602-Linux-5.6-compat-ktime_get_raw_ts64.patch
-4603-Linux-5.6-compat-time_t.patch
+#4550-Linux-5.5-compat-blkg_tryget.patch
+#4600-Linux-5.6-compat-struct-proc_ops.patch
+#4601-Linux-5.6-compat-timestamp_truncate.patch
+#4602-Linux-5.6-compat-ktime_get_raw_ts64.patch
+#4603-Linux-5.6-compat-time_t.patch
 zfs-mount-container-start.patch
-4610-ICP-Improve-AES-GCM-performance.patch
+pr10163.patch
+pr10184.patch
+overlay.patch
diff '--exclude=.git' -Npaur ubuntu/zfs-linux/debian/rules zfs-linux/debian/rules
--- ubuntu/zfs-linux/debian/rules	2020-05-16 17:46:48.147546638 -0400
+++ zfs-linux/debian/rules	2020-05-13 16:27:11.358747557 -0400
@@ -56,6 +56,9 @@ endif
 	  --with-systemdunitdir=/lib/systemd/system \
 	  --with-systemdpresetdir=/lib/systemd/system-preset \
 	  --with-systemdgeneratordir=/lib/systemd/system-generators \
+          --with-dracutdir=/usr/lib/dracut \
+          --sysconfdir=/etc \
+          --exec-prefix=/usr \
 	  --with-config=user
 
 override_dh_gencontrol:
@@ -165,7 +168,7 @@ endif
 
 override_dh_install:
 	find . -name lib*.la -delete
-	dh_install --fail-missing
+	dh_install --list-missing
 
 override_dh_installinit:
 	dh_installinit -r --no-restart-after-upgrade --name zfs-import